Skip to content
4 changes: 2 additions & 2 deletions paddle/operators/linear_chain_crf_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
ll -= std::log(sum);
// Now ll is equal to -log(Z).

const int* lbl = label.data<int>();
const int64_t* lbl = label.data<int64_t>();
PADDLE_ENFORCE_LT(
static_cast<size_t>(*std::max_element(lbl, lbl + seq_length)), tag_num,
"An invalid tag label that execesses the largest tag number.");
Expand Down Expand Up @@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
Tensor* emission_grad) const {
const T* w_exps = transition_exps.data<T>();
const T* x_exps = emission_exps.data<T>();
const int* label_value = label.data<int>();
const int64_t* label_value = label.data<int64_t>();
T* beta_value = beta->data<T>();

auto x_dims = emission_exps.dims();
Expand Down
5 changes: 4 additions & 1 deletion python/paddle/v2/fluid/layer_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ def create_parameter(self, attr, shape, dtype, suffix='w',
self.startup_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr_copy)
return self.main_program.global_block().create_parameter(
name=attr_copy['name'], dtype=dtype, shape=shape)
name=attr_copy['name'],
dtype=dtype,
shape=shape,
trainable=attr_copy.get('trainable', True))

def create_tmp_variable(self, dtype):
return self.main_program.current_block().create_var(
Expand Down
45 changes: 44 additions & 1 deletion python/paddle/v2/fluid/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def _get_default_bias_initializer():
def embedding(input,
size,
is_sparse=False,
param_initializer=None,
param_attr=None,
data_type='float32',
main_program=None,
Expand All @@ -136,9 +137,16 @@ def embedding(input,
to the LayerHelper constructor.

"""

def _get_default_param_initializer():
return XavierInitializer()

helper = LayerHelper('embedding', **locals())
w = helper.create_parameter(
attr=helper.param_attr, shape=size, dtype=data_type)
attr=helper.param_attr,
shape=size,
dtype=data_type,
initializer=param_initializer or _get_default_param_initializer())
tmp = helper.create_tmp_variable(data_type)
helper.append_op(
type='lookup_table',
Expand Down Expand Up @@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None):
return out


def linear_chain_crf(input,
label,
param_attr=None,
param_initializer=None,
main_program=None,
startup_program=None):
def _get_default_param_initializer():
return XavierInitializer()

helper = LayerHelper('linear_chain_crf', **locals())
size = input.shape[1]
transition = helper.create_parameter(
attr=helper.param_attr,
shape=[size + 2, size],
dtype=helper.input_dtype(),
initializer=param_initializer or _get_default_param_initializer())
alpha = helper.create_tmp_variable(dtype=helper.input_dtype())
emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
log_likelihood = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
type='linear_chain_crf',
inputs={"Emission": [input],
"Transition": transition,
"Label": label},
outputs={
"Alpha": [alpha],
"EmissionExps": [emission_exps],
"TransitionExps": transition_exps,
"LogLikelihood": log_likelihood
})

return log_likelihood


def assign(input, output, main_program=None, startup_program=None):
helper = LayerHelper('assign', **locals())
helper.append_op(
Expand Down
3 changes: 2 additions & 1 deletion python/paddle/v2/fluid/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ def create_optimization_pass(self,

optimize_ops = []
for param_and_grad in parameters_and_grads:
if param_and_grad[1] is not None:
if param_and_grad[0].trainable is True and param_and_grad[
1] is not None:
optimize_op = self._append_optimize_op(loss.block,
param_and_grad)
optimize_ops.append(optimize_op)
Expand Down
192 changes: 192 additions & 0 deletions python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor, g_scope
from paddle.v2.fluid.optimizer import SGDOptimizer

word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)

mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 1e-3

IS_SPARSE = True
PASS_NUM = 10
BATCH_SIZE = 20

embedding_name = 'emb'


def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)


def db_lstm():
# 8 features
word = layers.data(name='word_data', shape=[1], data_type='int64')
predicate = layers.data(name='verb_data', shape=[1], data_type='int64')
ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], data_type='int64')
ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], data_type='int64')
ctx_0 = layers.data(name='ctx_0_data', shape=[1], data_type='int64')
ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], data_type='int64')
ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], data_type='int64')
mark = layers.data(name='mark_data', shape=[1], data_type='int64')

predicate_embedding = layers.embedding(
input=predicate,
size=[pred_len, word_dim],
data_type='float32',
is_sparse=IS_SPARSE,
param_attr={'name': 'vemb'})

mark_embedding = layers.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
data_type='float32',
is_sparse=IS_SPARSE)

word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
layers.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr={'name': embedding_name,
'trainable': False}) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)

hidden_0_layers = [
layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
]

hidden_0 = layers.sums(input=hidden_0_layers)

lstm_0 = layers.dynamic_lstm(
input=hidden_0,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid')

# stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]

for i in range(1, depth):
mix_hidden = layers.sums(input=[
layers.fc(input=input_tmp[0], size=hidden_dim),
layers.fc(input=input_tmp[1], size=hidden_dim)
])

lstm = layers.dynamic_lstm(
input=mix_hidden,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid',
is_reverse=((i % 2) == 1))

input_tmp = [mix_hidden, lstm]

feature_out = layers.sums(input=[
layers.fc(input=input_tmp[0], size=label_dict_len),
layers.fc(input=input_tmp[1], size=label_dict_len)
])

return feature_out


def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = core.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res


def main():
# define network topology
feature_out = db_lstm()
target = layers.data(name='target', shape=[1], data_type='int64')
crf_cost = layers.linear_chain_crf(
input=feature_out,
label=target,
param_attr={"name": 'crfw',
"learning_rate": mix_hidden_lr})
avg_cost = layers.mean(x=crf_cost)
# TODO(qiao)
# 1. add crf_decode_layer and evaluator
# 2. use other optimizer and check why out will be NAN
sgd_optimizer = SGDOptimizer(learning_rate=0.0001)
opts = sgd_optimizer.minimize(avg_cost)

train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192),
batch_size=BATCH_SIZE)
place = core.CPUPlace()
exe = Executor(place)

exe.run(framework.default_startup_program())

embedding_param = g_scope.find_var(embedding_name).get_tensor()
embedding_param.set(
load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place)

batch_id = 0
for pass_id in xrange(PASS_NUM):
for data in train_data():
word_data = to_lodtensor(map(lambda x: x[0], data), place)
ctx_n2_data = to_lodtensor(map(lambda x: x[1], data), place)
ctx_n1_data = to_lodtensor(map(lambda x: x[2], data), place)
ctx_0_data = to_lodtensor(map(lambda x: x[3], data), place)
ctx_p1_data = to_lodtensor(map(lambda x: x[4], data), place)
ctx_p2_data = to_lodtensor(map(lambda x: x[5], data), place)
verb_data = to_lodtensor(map(lambda x: x[6], data), place)
mark_data = to_lodtensor(map(lambda x: x[7], data), place)
target = to_lodtensor(map(lambda x: x[8], data), place)

outs = exe.run(framework.default_main_program(),
feed={
'word_data': word_data,
'ctx_n2_data': ctx_n2_data,
'ctx_n1_data': ctx_n1_data,
'ctx_0_data': ctx_0_data,
'ctx_p1_data': ctx_p1_data,
'ctx_p2_data': ctx_p2_data,
'verb_data': verb_data,
'mark_data': mark_data,
'target': target
},
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])

if batch_id % 10 == 0:
print("avg_cost=" + str(avg_cost_val))

# exit early for CI
exit(0)

batch_id = batch_id + 1


if __name__ == '__main__':
main()
32 changes: 25 additions & 7 deletions python/paddle/v2/fluid/tests/test_layers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import unittest

import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.core as core
import unittest


class TestBook(unittest.TestCase):
Expand All @@ -20,7 +20,8 @@ def test_fit_a_line(self):
avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost)
program.append_backward(avg_cost)
print str(program)

# print str(program)

def test_recognize_digits_mlp(self):
program = Program()
Expand Down Expand Up @@ -49,7 +50,7 @@ def test_recognize_digits_mlp(self):
input=predict, label=label, main_program=program)
avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost)
print str(program)
# print str(program)

def test_simple_conv2d(self):
program = Program()
Expand All @@ -64,7 +65,7 @@ def test_simple_conv2d(self):
filter_size=[4, 4],
main_program=program)

print str(program)
# print str(program)

def test_recognize_digits_conv(self):
program = Program()
Expand Down Expand Up @@ -103,7 +104,7 @@ def test_recognize_digits_conv(self):

program.append_backward(avg_cost)

print str(program)
# print str(program)

def test_word_embedding(self):
program = Program()
Expand Down Expand Up @@ -164,7 +165,24 @@ def test_word_embedding(self):
avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost)

print str(program)
# print str(program)

def test_linear_chain_crf(self):
program = Program()

# Change g_program, so the rest layers use `g_program`
images = layers.data(
name='pixel',
shape=[784],
data_type='float32',
main_program=program)
label = layers.data(
name='label', shape=[1], data_type='int32', main_program=program)
hidden = layers.fc(input=images, size=128, main_program=program)
crf = layers.linear_chain_crf(
input=hidden, label=label, main_program=program)

# print str(program)


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def set_test_data(self):
transition_exps = np.exp(transition)

labels = np.random.randint(
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32")
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")

self.inputs = {
"Emission": (emission, lod),
Expand Down