|
374 | 374 | #define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \ |
375 | 375 | { \ |
376 | 376 | TYPE *TENSOR##_data = NULL; \ |
377 | | - long *TENSOR##_counter = NULL; \ |
| 377 | + long *TENSOR##_counter = NULL, *TENSOR##_dims = NULL, *TENSOR##_strides = NULL; \ |
378 | 378 | long TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i; \ |
379 | 379 | int TH_TENSOR_APPLY_hasFinished = 0; \ |
380 | 380 | \ |
|
384 | 384 | { \ |
385 | 385 | TENSOR##_data = TENSOR->storage->data+TENSOR->storageOffset; \ |
386 | 386 | \ |
387 | | - /* what is the first stride (ignore first dims=1)? */ \ |
388 | | - /* it will be used for offset updates while looping through the largest contiguous section */ \ |
389 | | - for(TENSOR##_dim = TENSOR->nDimension-1; TENSOR##_dim >= 0; TENSOR##_dim--) \ |
390 | | - { \ |
391 | | - if(TENSOR->size[TENSOR##_dim] != 1) \ |
392 | | - break; \ |
393 | | - } \ |
394 | | - TENSOR##_stride = (TENSOR##_dim == -1 ? 0 : TENSOR->stride[TENSOR##_dim]); \ |
395 | | -\ |
396 | | - /* what is the largest contiguous section? size will store the size of this section */ \ |
397 | | - TENSOR##_size = 1; \ |
398 | | - for(TENSOR##_dim = TENSOR->nDimension-1; TENSOR##_dim >= 0; TENSOR##_dim--) \ |
| 387 | + /* find the dimension of contiguous regions */ \ |
| 388 | + TENSOR##_dim = 1; \ |
| 389 | + for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; TENSOR##_i--) \ |
399 | 390 | { \ |
400 | | - if(TENSOR->size[TENSOR##_dim] != 1) \ |
401 | | - { \ |
402 | | - if(TENSOR->stride[TENSOR##_dim] == TENSOR##_size) \ |
403 | | - TENSOR##_size *= TENSOR->size[TENSOR##_dim]; \ |
404 | | - else \ |
405 | | - break; \ |
406 | | - } \ |
| 391 | + if(TENSOR->stride[TENSOR##_i] != TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1]) \ |
| 392 | +TENSOR##_dim++; \ |
407 | 393 | } \ |
408 | 394 | \ |
409 | 395 | /* allocate an array of k+1 elements, where k is the first index that */ \ |
410 | 396 | /* break contiguity. Note that if the tensor is contiguous, then k is -1 and */ \ |
411 | 397 | /* this counter array is empty. */ \ |
412 | 398 | \ |
| 399 | + TENSOR##_dims = (long*)THAlloc(sizeof(long)*(TENSOR##_dim)); \ |
| 400 | + TENSOR##_strides = (long*)THAlloc(sizeof(long)*(TENSOR##_dim)); \ |
| 401 | + TENSOR##_counter = (long*)THAlloc(sizeof(long)*(TENSOR##_dim)); \ |
| 402 | + long dim_index = TENSOR##_dim-1; \ |
| 403 | + TENSOR##_dims[dim_index] = TENSOR->size[TENSOR->nDimension-1]; \ |
| 404 | + TENSOR##_strides[dim_index] = TENSOR->stride[TENSOR->nDimension-1]; \ |
| 405 | + /* what is the first stride? */ \ |
413 | 406 | /* TENSOR##_counter tracks where we are in the storage. The offset into the */ \ |
414 | 407 | /* storage is given by storage_offset + (i * j), where i is the stride */ \ |
415 | 408 | /* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \ |
416 | | - TENSOR##_counter = (long*)THAlloc(sizeof(long)*(TENSOR##_dim+1)); \ |
417 | | - for(TENSOR##_i = 0; TENSOR##_i <= TENSOR##_dim; TENSOR##_i++) \ |
| 409 | + for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \ |
418 | 410 | TENSOR##_counter[TENSOR##_i] = 0; \ |
| 411 | + } \ |
| 412 | + for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; --TENSOR##_i) { \ |
| 413 | + if (TENSOR->stride[TENSOR##_i] == TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1]) { \ |
| 414 | + TENSOR##_dims[dim_index] = TENSOR->size[TENSOR##_i] * TENSOR##_dims[dim_index]; \ |
| 415 | + } else { \ |
| 416 | + --dim_index; \ |
| 417 | +TENSOR##_dims[dim_index] = TENSOR->size[TENSOR##_i]; \ |
| 418 | +TENSOR##_strides[dim_index] = TENSOR->stride[TENSOR##_i]; \ |
| 419 | + } \ |
| 420 | + } \ |
| 421 | + /* it will be used for offset updates while looping through the largest contiguous section */ \ |
| 422 | + TENSOR##_size = TENSOR##_dims[TENSOR##_dim-1]; \ |
| 423 | + /* what is the largest contiguous section? size will store the size of this section */ \ |
| 424 | + TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \ |
419 | 425 | } \ |
| 426 | +\ |
420 | 427 | \ |
421 | 428 | while(!TH_TENSOR_APPLY_hasFinished) \ |
422 | 429 | { \ |
423 | | - /* Loop through the contiguous section of the Tensor */ \ |
| 430 | + /* Loop through the inner most region of the Tensor */ \ |
424 | 431 | for(TENSOR##_i = 0; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \ |
425 | 432 | { \ |
426 | 433 | CODE \ |
427 | 434 | } \ |
428 | 435 | \ |
| 436 | + if(TENSOR##_dim == 1) \ |
| 437 | + break; \ |
429 | 438 | \ |
430 | | - /* Handle corner case where the entire Tensor was contiguous */ \ |
431 | | - if(TENSOR##_dim == -1) \ |
432 | | - break; \ |
433 | | - \ |
434 | 439 | /* Reset pointer to beginning of loop */ \ |
435 | 440 | TENSOR##_data -= TENSOR##_i*TENSOR##_stride; \ |
436 | | - for(TENSOR##_i = TENSOR##_dim; TENSOR##_i >= 0; TENSOR##_i--) \ |
| 441 | + for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \ |
437 | 442 | { \ |
438 | 443 | TENSOR##_counter[TENSOR##_i]++; \ |
439 | 444 | \ |
440 | 445 | /* Jump ahread by the stride of this dimension */ \ |
441 | | - TENSOR##_data += TENSOR->stride[TENSOR##_i]; \ |
| 446 | + TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \ |
442 | 447 | \ |
443 | | - if(TENSOR##_counter[TENSOR##_i] == TENSOR->size[TENSOR##_i]) \ |
| 448 | + if(TENSOR##_counter[TENSOR##_i] == TENSOR##_dims[TENSOR##_i]) \ |
444 | 449 | { \ |
445 | 450 | if(TENSOR##_i == 0) \ |
446 | 451 | { \ |
|
450 | 455 | else \ |
451 | 456 | { \ |
452 | 457 | /* Reset the pointer to the beginning of the chunk defined by this dimension */ \ |
453 | | - TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR->stride[TENSOR##_i]; \ |
| 458 | + TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \ |
454 | 459 | TENSOR##_counter[TENSOR##_i] = 0; \ |
455 | 460 | } \ |
456 | 461 | } \ |
|
459 | 464 | } \ |
460 | 465 | } \ |
461 | 466 | THFree(TENSOR##_counter); \ |
| 467 | + THFree(TENSOR##_strides); \ |
| 468 | + THFree(TENSOR##_dims); \ |
462 | 469 | } |
| 470 | +//printf("dim %ld counter %ld stride %ld\n", TENSOR##_i, TENSOR##_counter[TENSOR##_i], TENSOR##_strides[TENSOR##_i]); |
| 471 | +//printf("address %ld\n", TENSOR##_data-TENSOR->storage->data+TENSOR->storageOffset); |
463 | 472 |
|
464 | 473 | #endif |
0 commit comments