@@ -40,22 +40,22 @@ void THCTensor_(spaddcdiv)(THCState *state, THCTensor *r_, THCTensor *t, real va
4040 THError (" WARNING: Sparse Cuda Tensor op spaddcdiv is not implemented" );
4141}
4242
43- void THCSTensor_ (spaddmm)(THCState *state, THCTensor *r_, real beta, THCTensor *t, real alpha, THCSTensor *sparse , THCTensor *dense) {
43+ void THCSTensor_ (spaddmm)(THCState *state, THCTensor *r_, real beta, THCTensor *t, real alpha, THCSTensor *sparse_ , THCTensor *dense) {
4444#if defined(THCS_REAL_IS_FLOAT) || defined(THCS_REAL_IS_DOUBLE)
45- THCAssertSameGPU (THCSTensor_ (checkGPU)(state, 1 , 4 , sparse , r_, t, dense));
45+ THCAssertSameGPU (THCSTensor_ (checkGPU)(state, 1 , 4 , sparse_ , r_, t, dense));
4646 THCudaIntTensor *csr;
4747 THCIndexTensor *indices;
4848 THCTensor *values, *r__, *dense_;
4949
50- THArgCheck (sparse ->nDimensionI == 2 , 2 ,
51- " matrices expected, got %dD tensor" , sparse ->nDimensionI );
52- THArgCheck (sparse ->nDimensionV == 0 , 2 ,
53- " scalar values expected, got %dD values" , sparse ->nDimensionV );
50+ THArgCheck (sparse_ ->nDimensionI == 2 , 2 ,
51+ " matrices expected, got %dD tensor" , sparse_ ->nDimensionI );
52+ THArgCheck (sparse_ ->nDimensionV == 0 , 2 ,
53+ " scalar values expected, got %dD values" , sparse_ ->nDimensionV );
5454 THArgCheck (dense->nDimension == 2 , 2 ,
5555 " matrices expected, got %dD tensor" , dense->nDimension );
5656
57- long m = THCSTensor_ (size)(state, sparse , 0 );
58- long k = THCSTensor_ (size)(state, sparse , 1 );
57+ long m = THCSTensor_ (size)(state, sparse_ , 0 );
58+ long k = THCSTensor_ (size)(state, sparse_ , 1 );
5959 long n = THCTensor_ (size)(state, dense, 1 );
6060
6161 THCTensor_ (resize2d)(state, r_, m, n);
@@ -67,7 +67,7 @@ void THCSTensor_(spaddmm)(THCState *state, THCTensor *r_, real beta, THCTensor *
6767 THArgCheck (THCTensor_ (size)(state, dense, 0 ) == k, 3 ,
6868 " Expected dim 0 size %d, got %d" , k, THCTensor_ (size)(state, dense, 0 ));
6969
70- THCSTensor_ (coalesce )(state, sparse );
70+ THCSTensor *sparse = THCSTensor_ (newCoalesce )(state, sparse_ );
7171
7272 long nnz = THCSTensor_ (nnz)(state, sparse);
7373 indices = THCSTensor_ (newIndices)(state, sparse);
@@ -146,6 +146,7 @@ void THCSTensor_(spaddmm)(THCState *state, THCTensor *r_, real beta, THCTensor *
146146 THCIndexTensor_ (free )(state, rowIndices);
147147 THCIndexTensor_ (free )(state, colIndices);
148148 THCTensor_ (free )(state, values);
149+ THCSTensor_ (free )(state, sparse);
149150#else
150151 THError (" unimplemented data type" );
151152#endif
@@ -156,40 +157,42 @@ void THCSTensor_(sspaddmm)(THCState *state, THCSTensor *r_, real beta, THCSTenso
156157 // TODO Write some kernels
157158}
158159
159- void THCSTensor_ (hspmm)(THCState *state, THCSTensor *r_, real alpha, THCSTensor *sparse , THCTensor *dense) {
160+ void THCSTensor_ (hspmm)(THCState *state, THCSTensor *r_, real alpha, THCSTensor *sparse_ , THCTensor *dense) {
160161#if CUDA_VERSION >= 7000
161162 THCThrustAllocator thrustAlloc (state);
162163#define THRUST_EXEC (fn, ...) fn(thrust::cuda::par(thrustAlloc).on(THCState_getCurrentStream(state)), ##__VA_ARGS__)
163164#else
164165#define THRUST_EXEC (fn, ...) fn(##__VA_ARGS__)
165166#endif
166167
167- THCAssertSameGPU (THCSTensor_ (checkGPU)(state, 2 , 3 , r_, sparse , dense));
168+ THCAssertSameGPU (THCSTensor_ (checkGPU)(state, 2 , 3 , r_, sparse_ , dense));
168169
169- THArgCheck (sparse ->nDimensionI == 2 , 3 ,
170- " matrices expected, got %dD tensor" , sparse ->nDimensionI );
171- THArgCheck (sparse ->nDimensionV == 0 , 3 ,
172- " scalar values expected, got %dD values" , sparse ->nDimensionV );
170+ THArgCheck (sparse_ ->nDimensionI == 2 , 3 ,
171+ " matrices expected, got %dD tensor" , sparse_ ->nDimensionI );
172+ THArgCheck (sparse_ ->nDimensionV == 0 , 3 ,
173+ " scalar values expected, got %dD values" , sparse_ ->nDimensionV );
173174 THArgCheck (dense->nDimension == 2 , 4 ,
174175 " matrices expected, got %dD tensor" , dense->nDimension );
175176
176- long m = THCSTensor_ (size)(state, sparse , 0 );
177- long k = THCSTensor_ (size)(state, sparse , 1 );
177+ long m = THCSTensor_ (size)(state, sparse_ , 0 );
178+ long k = THCSTensor_ (size)(state, sparse_ , 1 );
178179 long n = THCTensor_ (size)(state, dense, 1 );
179180
180181 THArgCheck (THCTensor_ (size)(state, dense, 0 ) == k, 4 ,
181182 " Expected dim 0 size %d, got %d" , k, THCTensor_ (size)(state, dense, 0 ));
182183 long size[2 ] = {m, n};
183184 THCSTensor_ (rawResize)(state, r_, 1 , 1 , size);
184185
185- THCSTensor_ (coalesce )(state, sparse );
186+ THCSTensor *sparse = THCSTensor_ (newCoalesce )(state, sparse_ );
186187
187188 long nnz = THCSTensor_ (nnz)(state, sparse);
188189 THCIndexTensor *indices = THCIndexTensor_ (newWithSize2d)(state, 1 , nnz);
189190 // create values in column-major format to avoid copying in spaddmm
190191 THCTensor *values = THCTensor_ (newWithSize2d)(state, n, nnz);
191192 THCTensor_ (transpose)(state, values, NULL , 0 , 1 );
192193
194+ // why does sparse need to be cloned? If this is really necessary maybe we
195+ // need to fuse this with newCoalesce
193196 THCSTensor *newSparse = THCSTensor_ (newClone)(state, sparse);
194197 THCIndexTensor *spIndices = THCSTensor_ (newIndices)(state, newSparse);
195198 THCIndexTensor *dstIndices = THCIndexTensor_ (newSelect)(state, spIndices, 0 , 0 );
@@ -206,6 +209,7 @@ void THCSTensor_(hspmm)(THCState *state, THCSTensor *r_, real alpha, THCSTensor
206209 THCSTensor_ (free )(state, newSparse);
207210 THCIndexTensor_ (free )(state, spIndices);
208211 THCIndexTensor_ (free )(state, dstIndices);
212+ THCSTensor_ (free )(state, sparse);
209213
210214#undef THRUST_EXEC
211215}
@@ -348,8 +352,6 @@ void THCSTensor_(cadd)(THCState *state, THCSTensor *r_, THCSTensor *t, real valu
348352 if (!THCSTensor_ (isSameSizeAs)(state, t, src)) {
349353 THError (" cadd operands have incompatible sizes or dimension types" );
350354 }
351- THCSTensor_ (coalesce)(state, t);
352- THCSTensor_ (coalesce)(state, src);
353355
354356 if (src->nnz == 0 ) {
355357 THCSTensor_ (copy)(state, r_, t);
@@ -399,13 +401,13 @@ void THCSTensor_(csub)(THCState *state, THCSTensor *r_, THCSTensor *t, real valu
399401 THCSTensor_ (cadd)(state, r_, t, ScalarNegate<real>::to (value), src);
400402}
401403
402- void THCSTensor_ (cmul)(THCState *state, THCSTensor *r_, THCSTensor *t , THCSTensor *src ) {
403- THCAssertSameGPU (THCSTensor_ (checkGPU)(state, 3 , 3 , r_, t, src ));
404- if (!THCSTensor_ (isSameSizeAs)(state, t, src )) {
404+ void THCSTensor_ (cmul)(THCState *state, THCSTensor *r_, THCSTensor *t_ , THCSTensor *src_ ) {
405+ THCAssertSameGPU (THCSTensor_ (checkGPU)(state, 3 , 3 , r_, t_, src_ ));
406+ if (!THCSTensor_ (isSameSizeAs)(state, t_, src_ )) {
405407 THError (" cmul operands have incompatible sizes or dimension types" );
406408 }
407- THCSTensor_ (coalesce )(state, t );
408- THCSTensor_ (coalesce )(state, src );
409+ THCSTensor *t = THCSTensor_ (newCoalesce )(state, t_ );
410+ THCSTensor *src = THCSTensor_ (newCoalesce )(state, src_ );
409411
410412 if (t->nnz == 0 || src->nnz == 0 ) {
411413 THCSTensor_ (zero)(state, r_);
@@ -453,6 +455,8 @@ void THCSTensor_(cmul)(THCState *state, THCSTensor *r_, THCSTensor *t, THCSTenso
453455 THCTensor_ (free )(state, t_values_);
454456 THCIndexTensor_ (free )(state, s_indices_);
455457 THCTensor_ (free )(state, s_values_);
458+ THCSTensor_ (free )(state, t);
459+ THCSTensor_ (free )(state, src);
456460}
457461
458462#if defined(THCS_REAL_IS_FLOAT) || defined(THCS_REAL_IS_DOUBLE)
0 commit comments