JiayiFeng
diff --git a/‎doc/api/v2/config/layer.rst‎
Lines changed: 11 additions & 4 deletions b/‎doc/api/v2/config/layer.rst‎
Lines changed: 11 additions & 4 deletions
diff --git a/‎paddle/gserver/layers/FactorizationMachineLayer.cpp‎
Lines changed: 158 additions & 0 deletions b/‎paddle/gserver/layers/FactorizationMachineLayer.cpp‎
Lines changed: 158 additions & 0 deletions
diff --git a/‎paddle/gserver/layers/FactorizationMachineLayer.h‎
Lines changed: 80 additions & 0 deletions b/‎paddle/gserver/layers/FactorizationMachineLayer.h‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎paddle/gserver/tests/test_LayerGrad.cpp‎
Lines changed: 19 additions & 0 deletions b/‎paddle/gserver/tests/test_LayerGrad.cpp‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎paddle/math/CpuSparseMatrix.cpp‎
Lines changed: 29 additions & 0 deletions b/‎paddle/math/CpuSparseMatrix.cpp‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎paddle/math/CpuSparseMatrix.h‎
Lines changed: 9 additions & 0 deletions b/‎paddle/math/CpuSparseMatrix.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎proto/ModelConfig.proto‎
Lines changed: 3 additions & 0 deletions b/‎proto/ModelConfig.proto‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎python/paddle/trainer/config_parser.py‎
Lines changed: 15 additions & 0 deletions b/‎python/paddle/trainer/config_parser.py‎
Lines changed: 15 additions & 0 deletions
@@ -54,7 +54,7 @@ img_conv
 
 .. _api_v2.layer_context_projection:
 
-context_projection 
+context_projection
 ------------------
 .. autoclass:: paddle.v2.layer.context_projection
  :noindex:
@@ -70,7 +70,7 @@ Image Pooling Layer
 img_pool
 --------
 .. autoclass:: paddle.v2.layer.img_pool
- :noindex: 
+ :noindex:
 
 spp
 ---
@@ -104,7 +104,7 @@ sum_to_one_norm
 ---------------
 .. autoclass:: paddle.v2.layer.sum_to_one_norm
  :noindex:
- 
+
 cross_channel_norm
 ------------------
 .. autoclass:: paddle.v2.layer.cross_channel_norm
@@ -114,7 +114,7 @@ row_l2_norm
 -----------
 .. autoclass:: paddle.v2.layer.row_l2_norm
  :noindex:
- 
+
 Recurrent Layers
 ================
 
@@ -415,6 +415,13 @@ multiplex
 .. autoclass:: paddle.v2.layer.multiplex
  :noindex:
 
+Factorization Machine Layer
+============================
+
+factorization_machine
+---------------------
+.. autoclass:: paddle.v2.layer.factorization_machine
+ :noindex:
 
 Slicing and Joining Layers
 ==========================
 
@@ -0,0 +1,158 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "FactorizationMachineLayer.h"
+#include <algorithm>
+#include <vector>
+#include "paddle/math/SparseMatrix.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Stat.h"
+
+namespace paddle {
+
+REGISTER_LAYER(factorization_machine, FactorizationMachineLayer);
+
+bool FactorizationMachineLayer::init(const LayerMap& layerMap,
+ const ParameterMap& parameterMap) {
+ /* Initialize the basic parent class */
+ Layer::init(layerMap, parameterMap);
+
+ factorSize_ = config_.factor_size();
+
+ /* initialize the latentVectors_ */
+ CHECK_EQ(inputLayers_.size(), 1UL);
+ size_t inputSize = inputLayers_[0]->getSize();
+ CHECK_EQ(parameters_[0]->getSize(), inputSize * factorSize_);
+ latentVectors_ = std::unique_ptr<Weight>(
+ new Weight(inputSize, factorSize_, parameters_[0]));
+
+ return true;
+}
+
+void FactorizationMachineLayer::forward(PassType passType) {
+ Layer::forward(passType);
+
+ const MatrixPtr& inputV = getInputValue(0);
+
+ size_t batchSize = inputV->getHeight();
+ size_t outputSize = getSize();
+ size_t inputSize = inputLayers_[0]->getSize();
+ reserveOutput(batchSize, outputSize);
+
+ MatrixPtr outV = getOutputValue();
+
+ Matrix::resizeOrCreate(
+ latentVectorsSquare_, inputSize, factorSize_, false, useGpu_);
+ Matrix::resizeOrCreate(
+ inputMulFactor_, batchSize, factorSize_, false, useGpu_);
+ Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_);
+
+ REGISTER_TIMER_INFO("FmInputMulFactorTimer", getName().c_str());
+ inputMulFactor_->mul(*inputV, *latentVectors_->getW());
+ inputMulFactor_->square2(*tmpOut_);
+ outV->sumRows(*tmpOut_, 0.5, 0);
+
+ if (dynamic_cast<CpuSparseMatrix*>(inputV.get())) {
+ Matrix::resizeOrCreateSparseMatrix(inputSquare_,
+ inputV->getHeight(),
+ inputV->getWidth(),
+ inputV->getElementCnt(),
+ inputV->getValueType());
+ inputSquare_->copyFrom(*inputV);
+ (dynamic_cast<CpuSparseMatrix*>(inputSquare_.get()))->square2();
+ } else {
+ Matrix::resizeOrCreate(
+ inputSquare_, inputV->getHeight(), inputV->getWidth(), false, useGpu_);
+ inputV->square2(*inputSquare_);
+ }
+ latentVectors_->getW()->square2(*latentVectorsSquare_);
+ tmpOut_->mul(*inputSquare_, *latentVectorsSquare_);
+ outV->sumRows(*tmpOut_, -0.5, 1.0);
+
+ /* activation */ {
+ REGISTER_TIMER_INFO("FmFwAtvTimer", getName().c_str());
+ forwardActivation();
+ }
+}
+
+void FactorizationMachineLayer::backward(const UpdateCallback& callback) {
+ /* Do derivation */ { backwardActivation(); }
+
+ const MatrixPtr& inputV = getInputValue(0);
+ const MatrixPtr& oGrad = getOutputGrad();
+
+ Matrix::resizeOrCreate(
+ tmpSum_, 1, latentVectors_->getW()->getHeight(), false, useGpu_);
+ MatrixPtr tmpSumTrans = Matrix::create(tmpSum_->getRowBuf(0),
+ latentVectors_->getW()->getHeight(),
+ 1,
+ false,
+ useGpu_);
+
+ /* Calculate the gradients of the latentVectors_ matrix */
+ if (latentVectors_->getWGrad()) {
+ if (dynamic_cast<CpuSparseMatrix*>(inputV.get())) {
+ Matrix::resizeOrCreateSparseMatrix(tmpInput_,
+ inputV->getHeight(),
+ inputV->getWidth(),
+ inputV->getElementCnt());
+
+ CpuSparseMatrix* sparseInputV =
+ dynamic_cast<CpuSparseMatrix*>(inputV.get());
+ CpuSparseMatrix* sparseInputSquare =
+ dynamic_cast<CpuSparseMatrix*>(inputSquare_.get());
+ CpuSparseMatrix* sparseTmpInput =
+ dynamic_cast<CpuSparseMatrix*>(tmpInput_.get());
+ sparseTmpInput->copyFrom(*sparseInputV);
+
+ sparseTmpInput->rowScale(0, *sparseInputV, *oGrad);
+ latentVectors_->getWGrad()->mul(
+ *sparseTmpInput->getTranspose(), *inputMulFactor_, 1, 1);
+ sparseTmpInput->rowScale(0, *sparseInputSquare, *oGrad);
+
+ Matrix::resizeOrCreate(negOnes_, 1, inputV->getHeight(), false, useGpu_);
+ negOnes_->zeroMem();
+ negOnes_->add(-1);
+ tmpSum_->mul(*negOnes_, *sparseTmpInput, 1, 0);
+ } else {
+ Matrix::resizeOrCreate(
+ tmpInput_, inputV->getHeight(), inputV->getWidth(), false, useGpu_);
+
+ tmpInput_->rowScale(0, *inputV, *oGrad);
+ latentVectors_->getWGrad()->mul(
+ *tmpInput_->getTranspose(), *inputMulFactor_, 1, 1);
+ tmpInput_->rowScale(0, *inputSquare_, *oGrad);
+
+ tmpSum_->sumCols(*tmpInput_, -1, 0);
+ }
+
+ latentVectors_->getWGrad()->addRowScale(
+ 0, *latentVectors_->getW(), *tmpSumTrans);
+
+ /* Increasing the number of gradient */
+ latentVectors_->getParameterPtr()->incUpdate(callback);
+ }
+
+ /* Calculate the input layers gradient */
+ MatrixPtr inGrad = getInputGrad(0);
+ if (inGrad != NULL) {
+ inGrad->mul(
+ *inputMulFactor_, *latentVectors_->getW()->getTranspose(), 1, 1);
+ tmpSumTrans->sumRows(*latentVectorsSquare_, -1, 0);
+ inGrad->addColScale(0, *inputV, *tmpSum_);
+ inGrad->rowScale(0, *inGrad, *oGrad);
+ }
+}
+
+} // namespace paddle
@@ -0,0 +1,80 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "Layer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/utils/ThreadLocal.h"
+
+namespace paddle {
+/**
+ * @brief The Factorization Machine models pairwise (order-2) feature
+ * interactions as inner product of the learned latent vectors corresponding
+ * to each input feature.
+ *
+ * The Factorization Machine can effectively capture feature interactions
+ * especially when the input is sparse. While in principle FM can model higher
+ * order feature interaction, in practice usually only order-2 feature
+ * interactions are considered. The Factorization Machine Layer here only
+ * computes the order-2 interations with the formula:
+ *
+ * \f[
+ * y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j
+ * \f]
+ *
+ * The detailed calculation for forward and backward can be found at this paper:
+ *
+ * Factorization machines.
+ *
+ * The config file api is factorization_machine.
+ */
+
+class FactorizationMachineLayer : public Layer {
+protected:
+ // The latent vectors, shape: (size, factorSize_)
+ // Each row of the latentVectors_ matrix is the latent vector
+ // corresponding to one input feature dimension
+ std::unique_ptr<Weight> latentVectors_;
+ // The hyperparameter that defines the dimensionality of the factorization
+ size_t factorSize_;
+
+private:
+ // Store the square values of the letent vectors matrix
+ MatrixPtr latentVectorsSquare_;
+ // Store the square values of input matrix
+ MatrixPtr inputSquare_;
+ // The result of input matrix * latent vector matrix that will be used in
+ // both forward and backward step
+ MatrixPtr inputMulFactor_;
+ // Store temporary calculation result
+ MatrixPtr tmpOut_;
+ MatrixPtr tmpSum_;
+ MatrixPtr tmpInput_;
+ // Negative identity matrix
+ MatrixPtr negOnes_;
+
+public:
+ explicit FactorizationMachineLayer(const LayerConfig& config)
+ : Layer(config) {}
+ ~FactorizationMachineLayer() {}
+
+ bool init(const LayerMap& layerMap,
+ const ParameterMap& parameterMap) override;
+
+ void forward(PassType passType) override;
+ void backward(const UpdateCallback& callback = nullptr) override;
+};
+
+} // namespace paddle
@@ -2464,6 +2464,25 @@ TEST(Layer, L2DistanceLayer) {
  }
 }
 
+void testFactorizationMachineLayer(InputType type, bool useGpu) {
+ const int FACTOR_SIZE = 10;
+ TestConfig config;
+ config.layerConfig.set_type("factorization_machine");
+ config.layerConfig.set_factor_size(FACTOR_SIZE);
+ config.layerConfig.set_size(1);
+ config.biasSize = 0;
+ config.inputDefs.push_back({type, "layer_0", 128, 1280});
+ config.layerConfig.add_inputs();
+ testLayerGrad(config, "factorization_machine", 16, false, useGpu, false);
+}
+
+TEST(Layer, FactorizationMachineLayer) {
+ for (auto useGpu : {false, true}) {
+ testFactorizationMachineLayer(INPUT_DATA, useGpu);
+ }
+ testFactorizationMachineLayer(INPUT_SPARSE_FLOAT_VALUE_DATA, false);
+}
+
 int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  initMain(argc, argv);
 
@@ -260,6 +260,35 @@ void CpuSparseMatrix::printOneRow(std::ostream& os, size_t idx) const {
  os << ";";
 }
 
+void CpuSparseMatrix::rowScale(size_t cCol, CpuSparseMatrix& b, Matrix& c) {
+ CHECK(getFormat() != SPARSE_CSC) << "Not supported";
+ CHECK_EQ(height_, b.getHeight());
+ CHECK_EQ(width_, b.getWidth());
+ real* A = getValue();
+ real* B = b.getValue();
+ if (b.getValueType() == FLOAT_VALUE) {
+ for (size_t i = 0; i < height_; i++) {
+ size_t start = getRowStartIdx(i);
+ size_t end = getRowStartIdx(i + 1);
+ CHECK_EQ(start, b.getRowStartIdx(i));
+ CHECK_EQ(end, b.getRowStartIdx(i + 1));
+ for (size_t j = start; j < end; j++) {
+ A[j] = B[j] * c.getElement(i, cCol);
+ }
+ }
+ } else if (b.getValueType() == NO_VALUE) {
+ for (size_t i = 0; i < height_; i++) {
+ size_t start = getRowStartIdx(i);
+ size_t end = getRowStartIdx(i + 1);
+ CHECK_EQ(start, b.getRowStartIdx(i));
+ CHECK_EQ(end, b.getRowStartIdx(i + 1));
+ for (size_t j = start; j < end; j++) {
+ A[j] = c.getElement(i, cCol);
+ }
+ }
+ }
+}
+
 void CpuSparseMatrix::randomizeUniform() {
  CHECK_LE(elementCnt_, height_ * width_);
  if (valueType_ == FLOAT_VALUE) {
 
@@ -239,6 +239,15 @@ class CpuSparseMatrix : public Matrix {
  const unsigned int* cols,
  const real* values);
 
+ /**
+ * @brief this_row = b_row * c_row[cCol]
+ *
+ * @param[in] cCol the column of matrix c used to scale each row of b
+ * @param[in] b CpuSparseMatrix
+ * @param[in] c Matrix
+ */
+ void rowScale(size_t cCol, CpuSparseMatrix& b, Matrix& c);
+
  void randomizeUniform();
 
  void copyFrom(const GpuSparseMatrix& src, hl_stream_t stream);
 
@@ -544,6 +544,9 @@ message LayerConfig {
  // for batch normalization layer
  // The small constant added to the variance to improve numeric stability.
  optional double epsilon = 60 [ default = 0.00001 ];
+
+ // for factorization machine layer
+ optional uint32 factor_size = 61;
 }
 
 message EvaluatorConfig {
 
@@ -3870,6 +3870,21 @@ def __init__(self, name, inputs, value, **xargs):
  image_conf.channels)
 
 
+@config_layer('factorization_machine')
+class FactorizationMachineLayer(LayerBase):
+ def __init__(self, name, inputs, factor_size, **xargs):
+ super(FactorizationMachineLayer, self).__init__(
+ name, 'factorization_machine', size=1, inputs=inputs, **xargs)
+ config_assert(
+ len(self.inputs) == 1,
+ 'factorization machine layer must have one and only one input.')
+ self.config.factor_size = factor_size
+ input_layer = self.get_input_layer(0)
+ psize = input_layer.size * factor_size
+ dims = [input_layer.size, factor_size]
+ self.create_input_parameter(0, psize, dims)
+
+
 # Deprecated, use a new layer specific class instead
 @config_func
 def Layer(name, type, **xargs):
Original file line number	Diff line number	Diff line change
`@@ -544,6 +544,9 @@ message LayerConfig {`
`544`	`544`	`// for batch normalization layer`
`545`	`545`	`// The small constant added to the variance to improve numeric stability.`
`546`	`546`	`optional double epsilon = 60 [ default = 0.00001 ];`
	`547`	`+`
	`548`	`+ // for factorization machine layer`
	`549`	`+ optional uint32 factor_size = 61;`
`547`	`550`	`}`
`548`	`551`
`549`	`552`	`message EvaluatorConfig {`