Skip to content

Commit 7c8acd4

Browse files
authored
Merge pull request #2871 from lcy-seso/print_attention_weight
enable layer group to output a sequence inside it during generation.
2 parents 14f791f + 3bf4400 commit 7c8acd4

File tree

4 files changed

+146
-64
lines changed

4 files changed

+146
-64
lines changed

paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp

Lines changed: 78 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() {
10121012
/* width */ resultNum,
10131013
false,
10141014
/* useGpu */ false);
1015-
Matrix::resizeOrCreate(generator_.outArg.value,
1016-
/* height */ maxGenWordCount,
1017-
/* width */ 1,
1018-
false,
1019-
/* useGpu */ false);
10201015
}
10211016
ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
10221017
numSequences + 1,
@@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() {
10261021
} else {
10271022
oneWaySearch(numSequences);
10281023
}
1029-
if (dataArgsSize_) createDataOutlink(batchMachineIdVec_);
1024+
if (dataArgsSize_) createDataOutlink();
10301025

10311026
size_t size = generator_.ids.size();
10321027
generator_.outArg.ids->resize(size);
@@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
11061101
}
11071102

11081103
batchMachineIdVec_.clear();
1104+
batchMachineStartPos_.clear();
11091105
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
11101106
starts[0] = 0;
11111107
generator_.ids.clear();
@@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
13121308
finalPaths_[i].resize(minFinalPathsSize);
13131309
}
13141310

1315-
batchMachineIdVec_.clear();
13161311
generator_.ids.clear();
13171312
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
13181313
starts[0] = 0;
13191314
if (numResults > 1) {
1320-
real* probs = generator_.outArg.in->getData();
1315+
int idsProbSaveSize = 0;
1316+
for (auto inSeq : finalPaths_) {
1317+
for (auto path : inSeq) idsProbSaveSize += path.ids.size();
1318+
idsProbSaveSize += inSeq.size();
1319+
}
1320+
Matrix::resizeOrCreate(
1321+
generator_.outArg.value, idsProbSaveSize, 1, false, false);
13211322
real* idsProb = generator_.outArg.value->getData();
1323+
1324+
real* probs = generator_.outArg.in->getData();
13221325
size_t curPos = 0;
13231326
for (size_t i = 0; i < finalPaths_.size(); ++i) {
13241327
for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
@@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() {
13331336
curPos += genLen;
13341337
idsProb[curPos++] = -1.0;
13351338
probs[i * numResults + j] = path.logProb;
1336-
1337-
if (!j && dataArgsSize_) {
1338-
// in beam search, here only reserved the top 1 generated result
1339-
// for out_links that are not the generated word indices.
1340-
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
1341-
path.machineIdVec.begin(),
1342-
path.machineIdVec.end());
1343-
}
13441339
}
13451340
starts[i + 1] = generator_.ids.size();
13461341
}
13471342
} else {
13481343
for (size_t i = 0; i < finalPaths_.size(); ++i) {
13491344
CHECK(!finalPaths_[i].empty());
1350-
generator_.ids.insert(generator_.ids.begin(),
1351-
finalPaths_[i][0].ids.begin(),
1352-
finalPaths_[i][0].ids.end());
1353-
starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size();
1345+
Path& path = finalPaths_[i][0];
1346+
generator_.ids.insert(
1347+
generator_.ids.begin(), path.ids.begin(), path.ids.end());
1348+
starts[i + 1] = starts[i] + path.ids.size();
13541349
}
13551350
}
13561351
}
@@ -1364,25 +1359,76 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
13641359
}
13651360
}
13661361

1367-
void RecurrentGradientMachine::createDataOutlink(
1368-
std::vector<int>& machineIdVec) {
1369-
size_t seqNum =
1370-
getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
1371-
std::vector<int> starts(seqNum + 1, 0);
1372-
for (size_t i = 0; i < seqNum; ++i) {
1373-
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size()
1374-
: finalPaths_[0][i].ids.size();
1375-
starts[i + 1] = starts[i] + seqLen;
1362+
void RecurrentGradientMachine::createDataOutlinkSelRowsInfo(
1363+
bool isSeq, std::vector<Argument>& outArgs) {
1364+
batchMachineIdVec_.clear();
1365+
1366+
size_t seqIdx = 0;
1367+
for (size_t i = 0; i < finalPaths_.size(); ++i) {
1368+
for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
1369+
std::vector<int>& machineIdVec = finalPaths_[i][j].machineIdVec;
1370+
if (isSeq) {
1371+
for (size_t i = 0; i < machineIdVec.size(); ++i) {
1372+
size_t rowId = machineIdVec[i];
1373+
int* seqPos =
1374+
outArgs[i].sequenceStartPositions->getMutableData(false);
1375+
batchMachineIdVec_.push_back(seqPos[rowId]);
1376+
}
1377+
} else {
1378+
batchMachineIdVec_.insert(
1379+
batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end());
1380+
}
1381+
seqIdx++;
1382+
}
1383+
}
1384+
}
1385+
1386+
void RecurrentGradientMachine::createDataOutlinkCopySizeInfo(
1387+
bool isSeq, std::vector<Argument>& outArgs, std::vector<int>& copySize) {
1388+
size_t totalSeqNum = std::accumulate(
1389+
finalPaths_.begin(),
1390+
finalPaths_.end(),
1391+
0UL,
1392+
[](size_t a, const std::vector<Path>& b) { return a + b.size(); });
1393+
copySize.resize(totalSeqNum, 1);
1394+
1395+
batchMachineStartPos_.resize(totalSeqNum + 1, 0);
1396+
if (isSeq) {
1397+
ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions;
1398+
CHECK_EQ(static_cast<size_t>(inputSeqStartPos->getSize() - 1),
1399+
getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size());
1400+
int* starts = inputSeqStartPos->getMutableData(false);
1401+
int seqId = 0;
1402+
for (int i = 0; i < finalPaths_.size(); ++i) {
1403+
for (int j = 0; j < finalPaths_[i].size(); ++j) {
1404+
copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i]
1405+
: starts[j + 1] - starts[j];
1406+
batchMachineStartPos_[seqId + 1] =
1407+
batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size();
1408+
seqId++;
1409+
}
1410+
}
1411+
} else {
1412+
for (size_t i = 0; i < finalPaths_[0].size(); ++i)
1413+
batchMachineStartPos_[i + 1] =
1414+
batchMachineStartPos_[i] + finalPaths_[0][i].ids.size();
13761415
}
1416+
}
13771417

1418+
void RecurrentGradientMachine::createDataOutlink() {
13781419
for (size_t i = 0; i < dataArgsSize_; i++) {
1420+
bool isSeq = dataArgsFrame_[i][0].hasSeq();
1421+
std::vector<int> copySize;
1422+
createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize);
1423+
createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]);
1424+
13791425
dataArgs_[i].concat(dataArgsFrame_[i],
1380-
machineIdVec,
1381-
starts,
1426+
batchMachineIdVec_,
1427+
batchMachineStartPos_,
1428+
copySize,
13821429
useGpu_,
13831430
HPPL_STREAM_1,
13841431
PASS_TEST);
1385-
13861432
auto dataAgent =
13871433
dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
13881434
CHECK_NOTNULL(dataAgent);

paddle/gserver/gradientmachines/RecurrentGradientMachine.h

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ class RecurrentGradientMachine : public NeuralNetwork {
190190
std::vector<int> ids;
191191

192192
/**
193-
* @brief idsProb, log probability of each generated words.
193+
* @brief idsProb, log probability of each generated word.
194194
*/
195195
std::vector<real> idsProb;
196196

@@ -472,15 +472,43 @@ class RecurrentGradientMachine : public NeuralNetwork {
472472
void copyDataOutlinkFrame(size_t machineCur);
473473

474474
/*
475-
* @brief In generation, if the layer group has more than 1 outlink, outlinks
476-
* except the first one are data outlinks. This function creates the data
477-
* outlinks.
478-
* @note In beam search, only one generated sequence with the hightest log
479-
* probabilites are retained.
480-
* @param machineIdVec : select a row of output matrix in each frame
481-
* that the generation process expanded.
475+
* @brief In generation, if the layer group has more than 1 outlink, outlink
476+
* except the first one is a data outlink. In RecurrentLayerGroup, each time
477+
* step is a separate Network, outputs of a layer inside the
478+
* RecurrentLayerGroup are stored in separate Arguments. If one layer is
479+
* specified as an outlink of RecurrentLayerGroup. This function will
480+
* collect outputs in each time step of each generated sequence which are
481+
* dispersed in separate Arguments to form a new single Argument as output of
482+
* RecurrentLayerGroup.
482483
*/
483-
void createDataOutlink(std::vector<int>& machineIdVec);
484+
void createDataOutlink();
485+
486+
/*
487+
* @brief decide to select how many rows from the Matrix stored the forward
488+
* pass results from a start position.
489+
*
490+
* @param isSeq: a flag indicating whetehr the layer to be output of the
491+
* RecurrentGradientMachine is a sequence or not
492+
* @param outArgs: all of the the returned Arguments of the forward pass
493+
* during the generation process.
494+
* @param copySize: the returned result, number of rows to select from the
495+
* Matrix stored the forward pass results from a start position.
496+
*/
497+
void createDataOutlinkCopySizeInfo(bool isSeq,
498+
std::vector<Argument>& outArgs,
499+
std::vector<int>& copySize);
500+
501+
/*
502+
* @brief decide index of the start row for each time step of a generated
503+
* sequence in Matrix stored the entire beam search batch's forward pass
504+
* results.
505+
*
506+
* @param isSeq: a flag indicating whether the layer to be output of the
507+
* RecurrentGradientMachine is a sequence or not
508+
* @param outArgs: all of the returned Arguments of the forward pass
509+
* during the generation process.
510+
*/
511+
void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);
484512

485513
/*
486514
* @brief used in beam search, connect previous frame to form recurrent link
@@ -543,6 +571,7 @@ class RecurrentGradientMachine : public NeuralNetwork {
543571
std::vector<int> topIds_;
544572
std::vector<int> seqIds_;
545573
std::vector<int> batchMachineIdVec_;
574+
std::vector<int> batchMachineStartPos_;
546575
std::vector<std::vector<Path>> finalPaths_;
547576
std::vector<real> minFinalPathLogProb_;
548577
BeamSearchControlCallbacks* beamSearchCtrlCallbacks_;

paddle/parameter/Argument.cpp

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -276,17 +276,21 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
276276
void Argument::concat(const std::vector<Argument>& args,
277277
const std::vector<int>& selectRows,
278278
const std::vector<int>& seqStartPos,
279+
const std::vector<int>& copySize,
279280
bool useGpu,
280281
hl_stream_t stream,
281282
PassType passType) {
282283
CHECK(!subSequenceStartPositions)
283284
<< "undefined behavior for subsequence positions";
284285

285-
size_t batchSize = selectRows.size();
286+
size_t batchSize = 0;
287+
for (size_t i = 0; i < copySize.size(); ++i)
288+
batchSize += copySize[i] * (seqStartPos[i + 1] - seqStartPos[i]);
289+
286290
auto copyArg = [batchSize, stream](MatrixPtr& dst,
287291
MatrixPtr src,
288-
int startRow,
289-
int pos,
292+
int desStartRow,
293+
int srcStartRow,
290294
int size,
291295
bool useGpu) {
292296
if (!src) {
@@ -300,28 +304,29 @@ void Argument::concat(const std::vector<Argument>& args,
300304
dst->resize(batchSize, width);
301305
}
302306

303-
MatrixPtr tmpMatrix = dst->subMatrix(startRow, size);
304-
tmpMatrix->copyFrom(*src->subMatrix(pos, size), stream);
307+
MatrixPtr tmpMatrix = dst->subMatrix(desStartRow, size);
308+
tmpMatrix->copyFrom(*src->subMatrix(srcStartRow, size), stream);
305309
};
306310

307311
auto copyIds = [batchSize, stream](IVectorPtr& dst,
308312
const IVectorPtr& src,
309-
int startRow,
310-
int pos,
313+
int desStartRow,
314+
int srcStartRow,
311315
int size,
312316
bool useGpu) {
313317
if (!src) {
314318
dst.reset();
315319
return;
316320
}
317321
IVector::resizeOrCreate(dst, batchSize, useGpu);
318-
dst->subVec(startRow, size)->copyFrom(*src->subVec(pos, size), stream);
322+
dst->subVec(desStartRow, size)
323+
->copyFrom(*src->subVec(srcStartRow, size), stream);
319324
};
320325

321326
auto copyStrs = [batchSize, stream](SVectorPtr& dst,
322327
const SVectorPtr& src,
323-
int startRow,
324-
int pos,
328+
int desStartRow,
329+
int srcStartRow,
325330
int size,
326331
bool useGpu) {
327332
if (!src) {
@@ -333,30 +338,31 @@ void Argument::concat(const std::vector<Argument>& args,
333338
} else {
334339
dst->resize(batchSize);
335340
}
336-
std::copy(
337-
src->begin() + pos, src->begin() + pos + size, dst->begin() + startRow);
341+
std::copy(src->begin() + srcStartRow,
342+
src->begin() + srcStartRow + size,
343+
dst->begin() + desStartRow);
338344
};
339345

340346
dataId = args[0].dataId;
341347
CHECK_NE(seqStartPos.size(), 0UL);
342-
size_t sampleNum = seqStartPos.size() - 1;
343-
for (size_t i = 0; i < sampleNum; ++i) {
348+
int desStartRow = 0;
349+
for (size_t i = 0; i < copySize.size(); ++i) {
344350
int startPos = seqStartPos[i];
345351
int endPos = seqStartPos[i + 1];
346352
CHECK_GE(args.size(), static_cast<size_t>(endPos - startPos));
347353
for (int j = startPos; j < endPos; ++j) {
348354
const Argument& arg = args[j - startPos];
349-
CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have"
350-
<< " same dataId";
351-
const int copySize = 1;
352-
const int rowIdx = selectRows[j];
353-
copyArg(in, arg.in, j, rowIdx, copySize, useGpu);
354-
copyArg(value, arg.value, j, rowIdx, copySize, useGpu);
355+
CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have "
356+
<< "the same dataId.";
357+
const int srcStartRow = selectRows[j];
358+
copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu);
359+
copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu);
355360
if (passType != PASS_TEST) {
356-
copyArg(grad, arg.grad, j, rowIdx, copySize, useGpu);
361+
copyArg(grad, arg.grad, desStartRow, srcStartRow, copySize[i], useGpu);
357362
}
358-
copyIds(ids, arg.ids, j, rowIdx, copySize, useGpu);
359-
copyStrs(strs, arg.strs, j, rowIdx, copySize, useGpu);
363+
copyIds(ids, arg.ids, desStartRow, srcStartRow, copySize[i], useGpu);
364+
copyStrs(strs, arg.strs, desStartRow, srcStartRow, copySize[i], useGpu);
365+
desStartRow += copySize[i];
360366
}
361367
}
362368
ICpuGpuVector::resizeOrCreate(

paddle/parameter/Argument.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ struct Argument {
240240
void concat(const std::vector<Argument>& args,
241241
const std::vector<int>& selectRows,
242242
const std::vector<int>& seqStartPos,
243+
const std::vector<int>& copySize,
243244
bool useGpu,
244245
hl_stream_t stream,
245246
PassType passType);

0 commit comments

Comments
 (0)