Skip to content

Commit 9d8d7e4

Browse files
committed
Replace GETC to SAMPLE_WEIGHT
1 parent 41cbfde commit 9d8d7e4

File tree

4 files changed

+31
-28
lines changed

4 files changed

+31
-28
lines changed

sklearn/svm/liblinear.pxd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ cdef extern from "src/liblinear/linear.h":
2020
cdef extern from "src/liblinear/liblinear_helper.c":
2121
void copy_w(void *, model *, int)
2222
parameter *set_parameter(int, double, double, int, char *, char *, int, int, double)
23-
problem *set_problem (char *, char *, np.npy_intp *, double, char *)
23+
problem *set_problem (char *, char *, np.npy_intp *, double, double *)
2424
problem *csr_set_problem (char *values, np.npy_intp *n_indices,
2525
char *indices, np.npy_intp *n_indptr, char *indptr, char *Y,
26-
np.npy_intp n_features, double bias, char *)
26+
np.npy_intp n_features, double bias, double *)
2727

2828
model *set_model(parameter *, char *, np.npy_intp *, char *, double)
2929

sklearn/svm/liblinear.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ cimport liblinear
1111
np.import_array()
1212

1313

14-
def train_wrap(X, np.ndarray[np.float64_t, ndim=1, mode='c'] Y,
14+
def train_wrap(X, np.ndarray[np.float64_t, ndim=1, mode='c'] Y,
1515
bint is_sparse, int solver_type, double eps, double bias,
1616
double C, np.ndarray[np.float64_t, ndim=1] class_weight,
1717
int max_iter, unsigned random_seed, double epsilon,
18-
np.ndarray[np.float64_t, ndim=1] sample_weight):
18+
np.ndarray[np.float64_t, ndim=1, mode='c'] sample_weight):
1919
cdef parameter *param
2020
cdef problem *problem
2121
cdef model *model

sklearn/svm/src/liblinear/liblinear_helper.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ static struct feature_node **csr_to_sparse(double *values,
124124
return sparse;
125125
}
126126

127-
struct problem * set_problem(char *X,char *Y, npy_intp *dims, double bias, char* sample_weight)
127+
struct problem * set_problem(char *X,char *Y, npy_intp *dims, double bias, double* sample_weight)
128128
{
129129
struct problem *problem;
130130
/* not performant but simple */
@@ -141,7 +141,7 @@ struct problem * set_problem(char *X,char *Y, npy_intp *dims, double bias, char*
141141
problem->y = (double *) Y;
142142
problem->x = dense_to_sparse((double *) X, dims, bias);
143143
problem->bias = bias;
144-
problem->sample_weight = (double *) sample_weight;
144+
problem->sample_weight = sample_weight;
145145
if (problem->x == NULL) {
146146
free(problem);
147147
return NULL;
@@ -152,7 +152,7 @@ struct problem * set_problem(char *X,char *Y, npy_intp *dims, double bias, char*
152152

153153
struct problem * csr_set_problem (char *values, npy_intp *n_indices,
154154
char *indices, npy_intp *n_indptr, char *indptr, char *Y,
155-
npy_intp n_features, double bias, char *sample_weight) {
155+
npy_intp n_features, double bias, double *sample_weight) {
156156

157157
struct problem *problem;
158158
problem = malloc (sizeof (struct problem));
@@ -169,7 +169,7 @@ struct problem * csr_set_problem (char *values, npy_intp *n_indices,
169169
problem->x = csr_to_sparse((double *) values, n_indices, (int *) indices,
170170
n_indptr, (int *) indptr, bias, n_features);
171171
problem->bias = bias;
172-
problem->sample_weight = (double *) sample_weight;
172+
problem->sample_weight = sample_weight;
173173

174174
if (problem->x == NULL) {
175175
free(problem);

sklearn/svm/src/liblinear/linear.cpp

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,8 +1244,9 @@ static int solve_l2r_l1l2_svr(
12441244
// See Algorithm 5 of Yu et al., MLJ 2010
12451245

12461246

1247-
#define GETC(i) upper_bound[y[i]+1]*sample_weight[i]
1248-
// To support weights for instances (sample_weight*class_weight), use GETC(i) (i)
1247+
#define SAMPLE_WEIGHT(i) upper_bound[y[i]+1]*sample_weight[i]
1248+
// To support weights for instances, use SAMPLE_WEIGHT(i)
1249+
// Each instance is weighted by sample_weight*class_weight)
12491250

12501251
int solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn,
12511252
int max_iter)
@@ -1276,12 +1277,12 @@ int solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, dou
12761277
}
12771278

12781279
// Initial alpha can be set here. Note that
1279-
// 0 < alpha[i] < GETC(i)
1280-
// alpha[2*i] + alpha[2*i+1] = GETC(i)
1280+
// 0 < alpha[i] < SAMPLE_WEIGHT(i)
1281+
// alpha[2*i] + alpha[2*i+1] = SAMPLE_WEIGHT(i)
12811282
for(i=0; i<l; i++)
12821283
{
1283-
alpha[2*i] = min(0.001*GETC(i), 1e-8);
1284-
alpha[2*i+1] = GETC(i) - alpha[2*i];
1284+
alpha[2*i] = min(0.001*SAMPLE_WEIGHT(i), 1e-8);
1285+
alpha[2*i+1] = SAMPLE_WEIGHT(i) - alpha[2*i];
12851286
}
12861287

12871288
for(i=0; i<w_size; i++)
@@ -1313,7 +1314,7 @@ int solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, dou
13131314
{
13141315
i = index[s];
13151316
schar yi = y[i];
1316-
double C = GETC(i);
1317+
double C = SAMPLE_WEIGHT(i);
13171318
double ywTx = 0, xisq = xTx[i];
13181319
feature_node *xi = prob->x[i];
13191320
while (xi->index != -1)
@@ -1396,7 +1397,7 @@ int solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, dou
13961397
v *= 0.5;
13971398
for(i=0; i<l; i++)
13981399
v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1399-
- GETC(i) * log(GETC(i));
1400+
- SAMPLE_WEIGHT(i) * log(SAMPLE_WEIGHT(i));
14001401
info("Objective value = %lf\n", v);
14011402

14021403
delete [] xTx;
@@ -1705,9 +1706,10 @@ static int solve_l1r_l2_svc(
17051706
// solution will be put in w
17061707
//
17071708
// See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
1708-
#undef GETC
1709-
#define GETC(i) C[y[i]+1]*sample_weight[i]
1710-
// To support weights (sample_weight*class_weight) for instances, use GETC(i) (i)
1709+
#undef SAMPLE_WEIGHT
1710+
#define SAMPLE_WEIGHT(i) C[y[i]+1]*sample_weight[i]
1711+
// To support weights for instances, use SAMPLE_WEIGHT(i)
1712+
// Each instance is weighted by (class_weight*sample_weight)
17111713

17121714
static int solve_l1r_lr(
17131715
const problem *prob_col, double *w, double eps,
@@ -1777,16 +1779,16 @@ static int solve_l1r_lr(
17771779
double val = x->value;
17781780
exp_wTx[ind] += w[j]*val;
17791781
if(y[ind] == -1)
1780-
xjneg_sum[j] += GETC(ind)*val;
1782+
xjneg_sum[j] += SAMPLE_WEIGHT(ind)*val;
17811783
x++;
17821784
}
17831785
}
17841786
for(j=0; j<l; j++)
17851787
{
17861788
exp_wTx[j] = exp(exp_wTx[j]);
17871789
double tau_tmp = 1/(1+exp_wTx[j]);
1788-
tau[j] = GETC(j)*tau_tmp;
1789-
D[j] = GETC(j)*exp_wTx[j]*tau_tmp*tau_tmp;
1790+
tau[j] = SAMPLE_WEIGHT(j)*tau_tmp;
1791+
D[j] = SAMPLE_WEIGHT(j)*exp_wTx[j]*tau_tmp*tau_tmp;
17901792
}
17911793

17921794
while(newton_iter < max_newton_iter)
@@ -1962,7 +1964,7 @@ static int solve_l1r_lr(
19621964
negsum_xTd = 0;
19631965
for(int i=0; i<l; i++)
19641966
if(y[i] == -1)
1965-
negsum_xTd += GETC(i)*xTd[i];
1967+
negsum_xTd += SAMPLE_WEIGHT(i)*xTd[i];
19661968

19671969
int num_linesearch;
19681970
for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
@@ -1973,7 +1975,7 @@ static int solve_l1r_lr(
19731975
{
19741976
double exp_xTd = exp(xTd[i]);
19751977
exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
1976-
cond += GETC(i)*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
1978+
cond += SAMPLE_WEIGHT(i)*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
19771979
}
19781980

19791981
if(cond <= 0)
@@ -1985,8 +1987,8 @@ static int solve_l1r_lr(
19851987
{
19861988
exp_wTx[i] = exp_wTx_new[i];
19871989
double tau_tmp = 1/(1+exp_wTx[i]);
1988-
tau[i] = GETC(i)*tau_tmp;
1989-
D[i] = GETC(i)*exp_wTx[i]*tau_tmp*tau_tmp;
1990+
tau[i] = SAMPLE_WEIGHT(i)*tau_tmp;
1991+
D[i] = SAMPLE_WEIGHT(i)*exp_wTx[i]*tau_tmp*tau_tmp;
19901992
}
19911993
break;
19921994
}
@@ -2053,9 +2055,9 @@ static int solve_l1r_lr(
20532055
}
20542056
for(j=0; j<l; j++)
20552057
if(y[j] == 1)
2056-
v += GETC(j)*log(1+1/exp_wTx[j]);
2058+
v += SAMPLE_WEIGHT(j)*log(1+1/exp_wTx[j]);
20572059
else
2058-
v += GETC(j)*log(1+exp_wTx[j]);
2060+
v += SAMPLE_WEIGHT(j)*log(1+exp_wTx[j]);
20592061

20602062
info("Objective value = %lf\n", v);
20612063
info("#nonzeros/#features = %d/%d\n", nnz, w_size);
@@ -2496,6 +2498,7 @@ model* train(const problem *prob, const parameter *param)
24962498
free(sub_prob.x);
24972499
free(sub_prob.y);
24982500
free(weighted_C);
2501+
delete[] sample_weight;
24992502
}
25002503
return model_;
25012504
}

0 commit comments

Comments
 (0)