@@ -460,25 +460,7 @@ void FleetWrapper::PushSparseFromTensorAsync(
460460 clks->lod ().size () ? clks->lod ()[0 ].size () - 1 : clks->dims ()[0 ];
461461 CHECK (clk_size == batch_size || clk_size == 1 );
462462
463- std::vector<float > g;
464- for (framework::LoDTensor* g_tensor : *outputs) {
465- float * g_ori = g_tensor->data <float >();
466- // no cvm
467- if (batch_size_consist) { // TODO(zhaocaibei123): add config
468- // scale_sparse_gradient_with_batch_size_
469- Eigen::Map<
470- Eigen::Matrix<float , Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
471- g_mat (g_ori, g_tensor->numel () / fea_dim, fea_dim);
472- g_mat.rightCols (fea_dim) *= batch_size;
473- }
474-
475- size_t origin = g.size ();
476- size_t add = g_tensor->numel ();
477- g.resize (origin + add);
478-
479- memcpy (g.data () + origin, g_tensor->data <float >(), add * sizeof (float ));
480- }
481-
463+ CHECK (outputs->size () == inputs->size ());
482464 std::vector<uint64_t > push_keys;
483465 push_keys.reserve (MAX_FEASIGN_NUM / 100 );
484466 std::vector<std::vector<float >> push_values;
@@ -495,9 +477,21 @@ void FleetWrapper::PushSparseFromTensorAsync(
495477 const int64_t * clk_tensor = clks->data <int64_t >();
496478
497479 for (size_t index = 0 ; index < inputs->size (); ++index) {
480+ framework::LoDTensor* g_tensor = outputs->at (index);
481+ float * g = g_tensor->data <float >();
482+ // no cvm
483+ if (batch_size_consist) { // TODO(zhaocaibei123): add config
484+ // scale_sparse_gradient_with_batch_size_
485+ Eigen::Map<
486+ Eigen::Matrix<float , Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
487+ g_mat (g, g_tensor->numel () / fea_dim, fea_dim);
488+ g_mat.rightCols (fea_dim) *= batch_size;
489+ }
490+
498491 const framework::LoDTensor* tensor = inputs->at (index);
499492 const int64_t * ids = tensor->data <int64_t >();
500493 size_t len = tensor->numel ();
494+ output_len = 0 ;
501495
502496 if (tensor->lod ().size () > 0 ) {
503497 for (size_t i = 0 ; i < tensor->lod ()[0 ].size () - 1 ; ++i) {
@@ -519,7 +513,7 @@ void FleetWrapper::PushSparseFromTensorAsync(
519513
520514 float * data = push_values.back ().data () + 3 ;
521515
522- memcpy (data, g. data () + output_len, sizeof (float ) * fea_dim);
516+ memcpy (data, g + output_len, sizeof (float ) * fea_dim);
523517
524518 ++input_idx;
525519 }
@@ -542,14 +536,13 @@ void FleetWrapper::PushSparseFromTensorAsync(
542536
543537 float * data = push_values.back ().data () + 3 ;
544538
545- memcpy (data, g. data () + output_len, sizeof (float ) * fea_dim);
539+ memcpy (data, g + output_len, sizeof (float ) * fea_dim);
546540
547541 ++input_idx;
548542 }
549543 }
544+ CHECK (output_len == g_tensor->numel ());
550545 }
551- VLOG (1 ) << " output_len: " << output_len << " g.size(): " << g.size ();
552- CHECK (output_len == g.size ());
553546
554547 std::vector<float *> push_g_vec (input_idx, nullptr );
555548
0 commit comments