Skip to content

Commit 6c27359

Browse files
Jokerensoumith
authored andcommitted
THTensorApply Counter compress
1 parent e475c82 commit 6c27359

File tree

2 files changed

+41
-32
lines changed

2 files changed

+41
-32
lines changed

THTensorApply.h

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@
374374
#define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \
375375
{ \
376376
TYPE *TENSOR##_data = NULL; \
377-
long *TENSOR##_counter = NULL; \
377+
long *TENSOR##_counter = NULL, *TENSOR##_dims = NULL, *TENSOR##_strides = NULL; \
378378
long TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i; \
379379
int TH_TENSOR_APPLY_hasFinished = 0; \
380380
\
@@ -384,63 +384,68 @@
384384
{ \
385385
TENSOR##_data = TENSOR->storage->data+TENSOR->storageOffset; \
386386
\
387-
/* what is the first stride (ignore first dims=1)? */ \
388-
/* it will be used for offset updates while looping through the largest contiguous section */ \
389-
for(TENSOR##_dim = TENSOR->nDimension-1; TENSOR##_dim >= 0; TENSOR##_dim--) \
390-
{ \
391-
if(TENSOR->size[TENSOR##_dim] != 1) \
392-
break; \
393-
} \
394-
TENSOR##_stride = (TENSOR##_dim == -1 ? 0 : TENSOR->stride[TENSOR##_dim]); \
395-
\
396-
/* what is the largest contiguous section? size will store the size of this section */ \
397-
TENSOR##_size = 1; \
398-
for(TENSOR##_dim = TENSOR->nDimension-1; TENSOR##_dim >= 0; TENSOR##_dim--) \
387+
/* find the dimension of contiguous regions */ \
388+
TENSOR##_dim = 1; \
389+
for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; TENSOR##_i--) \
399390
{ \
400-
if(TENSOR->size[TENSOR##_dim] != 1) \
401-
{ \
402-
if(TENSOR->stride[TENSOR##_dim] == TENSOR##_size) \
403-
TENSOR##_size *= TENSOR->size[TENSOR##_dim]; \
404-
else \
405-
break; \
406-
} \
391+
if(TENSOR->stride[TENSOR##_i] != TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1]) \
392+
TENSOR##_dim++; \
407393
} \
408394
\
409395
/* allocate an array of k+1 elements, where k is the first index that */ \
410396
/* break contiguity. Note that if the tensor is contiguous, then k is -1 and */ \
411397
/* this counter array is empty. */ \
412398
\
399+
TENSOR##_dims = (long*)THAlloc(sizeof(long)*(TENSOR##_dim)); \
400+
TENSOR##_strides = (long*)THAlloc(sizeof(long)*(TENSOR##_dim)); \
401+
TENSOR##_counter = (long*)THAlloc(sizeof(long)*(TENSOR##_dim)); \
402+
long dim_index = TENSOR##_dim-1; \
403+
TENSOR##_dims[dim_index] = TENSOR->size[TENSOR->nDimension-1]; \
404+
TENSOR##_strides[dim_index] = TENSOR->stride[TENSOR->nDimension-1]; \
405+
/* what is the first stride? */ \
413406
/* TENSOR##_counter tracks where we are in the storage. The offset into the */ \
414407
/* storage is given by storage_offset + (i * j), where i is the stride */ \
415408
/* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \
416-
TENSOR##_counter = (long*)THAlloc(sizeof(long)*(TENSOR##_dim+1)); \
417-
for(TENSOR##_i = 0; TENSOR##_i <= TENSOR##_dim; TENSOR##_i++) \
409+
for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \
418410
TENSOR##_counter[TENSOR##_i] = 0; \
411+
} \
412+
for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; --TENSOR##_i) { \
413+
if (TENSOR->stride[TENSOR##_i] == TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1]) { \
414+
TENSOR##_dims[dim_index] = TENSOR->size[TENSOR##_i] * TENSOR##_dims[dim_index]; \
415+
} else { \
416+
--dim_index; \
417+
TENSOR##_dims[dim_index] = TENSOR->size[TENSOR##_i]; \
418+
TENSOR##_strides[dim_index] = TENSOR->stride[TENSOR##_i]; \
419+
} \
420+
} \
421+
/* it will be used for offset updates while looping through the largest contiguous section */ \
422+
TENSOR##_size = TENSOR##_dims[TENSOR##_dim-1]; \
423+
/* what is the largest contiguous section? size will store the size of this section */ \
424+
TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \
419425
} \
426+
\
420427
\
421428
while(!TH_TENSOR_APPLY_hasFinished) \
422429
{ \
423-
/* Loop through the contiguous section of the Tensor */ \
430+
/* Loop through the inner most region of the Tensor */ \
424431
for(TENSOR##_i = 0; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \
425432
{ \
426433
CODE \
427434
} \
428435
\
436+
if(TENSOR##_dim == 1) \
437+
break; \
429438
\
430-
/* Handle corner case where the entire Tensor was contiguous */ \
431-
if(TENSOR##_dim == -1) \
432-
break; \
433-
\
434439
/* Reset pointer to beginning of loop */ \
435440
TENSOR##_data -= TENSOR##_i*TENSOR##_stride; \
436-
for(TENSOR##_i = TENSOR##_dim; TENSOR##_i >= 0; TENSOR##_i--) \
441+
for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \
437442
{ \
438443
TENSOR##_counter[TENSOR##_i]++; \
439444
\
440445
/* Jump ahread by the stride of this dimension */ \
441-
TENSOR##_data += TENSOR->stride[TENSOR##_i]; \
446+
TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \
442447
\
443-
if(TENSOR##_counter[TENSOR##_i] == TENSOR->size[TENSOR##_i]) \
448+
if(TENSOR##_counter[TENSOR##_i] == TENSOR##_dims[TENSOR##_i]) \
444449
{ \
445450
if(TENSOR##_i == 0) \
446451
{ \
@@ -450,7 +455,7 @@
450455
else \
451456
{ \
452457
/* Reset the pointer to the beginning of the chunk defined by this dimension */ \
453-
TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR->stride[TENSOR##_i]; \
458+
TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \
454459
TENSOR##_counter[TENSOR##_i] = 0; \
455460
} \
456461
} \
@@ -459,6 +464,10 @@
459464
} \
460465
} \
461466
THFree(TENSOR##_counter); \
467+
THFree(TENSOR##_strides); \
468+
THFree(TENSOR##_dims); \
462469
}
470+
//printf("dim %ld counter %ld stride %ld\n", TENSOR##_i, TENSOR##_counter[TENSOR##_i], TENSOR##_strides[TENSOR##_i]);
471+
//printf("address %ld\n", TENSOR##_data-TENSOR->storage->data+TENSOR->storageOffset);
463472

464473
#endif

generic/THTensorMath.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void THTensor_(fill)(THTensor *r_, real value)
2727
THVector_(fill)(rp+i, value, i_end-i);
2828
}
2929
} else {
30-
TH_TENSOR_APPLY(real, r_, THVector_(fill)(r__data, value, r__size); break;);
30+
TH_TENSOR_APPLY(real, r_, *r__data = value;);
3131
}
3232
}
3333

0 commit comments

Comments
 (0)