From 8feea52d22ae3a5ce43ea89f2936e06ddb516b20 Mon Sep 17 00:00:00 2001 From: Nickolay Shmyrev Date: Wed, 10 May 2017 18:07:03 +0200 Subject: [PATCH 1/2] Update to tensorflow 1.0.0 --- README.md | 2 +- models/dual_encoder.py | 13 +++++++------ requirements.txt | 3 +-- udc_hparams.py | 4 ++-- udc_inputs.py | 8 -------- udc_model.py | 22 +++++++++++----------- udc_predict.py | 2 +- udc_test.py | 2 +- udc_train.py | 2 ++ 9 files changed, 26 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 8b6ec59..01a3d97 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ The code here implements the Dual LSTM Encoder model from [The Ubuntu Dialogue C #### Setup -This code uses Python 3 and Tensorflow >= 0.9. Clone the repository and install all required packages: +This code uses Python and Tensorflow == 1.0.0. Clone the repository and install all required packages: ``` pip install -U pip diff --git a/models/dual_encoder.py b/models/dual_encoder.py index e0e94a6..7deb51c 100644 --- a/models/dual_encoder.py +++ b/models/dual_encoder.py @@ -42,7 +42,8 @@ def dual_encoder_model( # Build the RNN with tf.variable_scope("rnn") as vs: # We use an LSTM Cell - cell = tf.nn.rnn_cell.LSTMCell( + + cell = tf.contrib.rnn.LSTMCell( hparams.rnn_dim, forget_bias=2.0, use_peepholes=True, @@ -51,10 +52,10 @@ def dual_encoder_model( # Run the utterance and context through the RNN rnn_outputs, rnn_states = tf.nn.dynamic_rnn( cell, - tf.concat(0, [context_embedded, utterance_embedded]), - sequence_length=tf.concat(0, [context_len, utterance_len]), + tf.concat([context_embedded, utterance_embedded], 0), + sequence_length=tf.concat([context_len, utterance_len], 0), dtype=tf.float32) - encoding_context, encoding_utterance = tf.split(0, 2, rnn_states.h) + encoding_context, encoding_utterance = tf.split(rnn_states.h, 2, 0) with tf.variable_scope("prediction") as vs: M = tf.get_variable("M", @@ -68,7 +69,7 @@ def dual_encoder_model( # Dot product between generated response and actual response # (c * M) * r - logits = tf.batch_matmul(generated_response, encoding_utterance, True) + logits = tf.matmul(generated_response, encoding_utterance, True) logits = tf.squeeze(logits, [2]) # Apply sigmoid to convert logits to probabilities @@ -78,7 +79,7 @@ def dual_encoder_model( return probs, None # Calculate the binary cross-entropy loss - losses = tf.nn.sigmoid_cross_entropy_with_logits(logits, tf.to_float(targets)) + losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.to_float(targets), logits=logits) # Mean loss across the batch of examples mean_loss = tf.reduce_mean(losses, name="mean_loss") diff --git a/requirements.txt b/requirements.txt index 8c33e68..8987690 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,6 @@ jupyter-client==4.3.0 jupyter-console==4.1.1 jupyter-core==4.1.0 MarkupSafe==0.23 -matplotlib==1.5.1 mistune==0.7.2 nbconvert==4.2.0 nbformat==4.0.1 @@ -36,7 +35,7 @@ scikit-learn==0.17.1 scipy==0.17.1 simplegeneric==0.8.1 six==1.10.0 -tensorflow==0.9.0 +tensorflow==1.0.0 terminado==0.6 tornado==4.3 traitlets==4.2.1 diff --git a/udc_hparams.py b/udc_hparams.py index c5747ae..083df37 100644 --- a/udc_hparams.py +++ b/udc_hparams.py @@ -9,7 +9,7 @@ # Model Parameters tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of the embeddings") -tf.flags.DEFINE_integer("rnn_dim", 256, "Dimensionality of the RNN cell") +tf.flags.DEFINE_integer("rnn_dim", 128, "Dimensionality of the RNN cell") tf.flags.DEFINE_integer("max_context_len", 160, "Truncate contexts to this length") tf.flags.DEFINE_integer("max_utterance_len", 80, "Truncate utterance to this length") @@ -20,7 +20,7 @@ # Training Parameters tf.flags.DEFINE_float("learning_rate", 0.001, "Learning rate") tf.flags.DEFINE_integer("batch_size", 128, "Batch size during training") -tf.flags.DEFINE_integer("eval_batch_size", 16, "Batch size during evaluation") +tf.flags.DEFINE_integer("eval_batch_size", 8, "Batch size during evaluation") tf.flags.DEFINE_string("optimizer", "Adam", "Optimizer Name (Adam, Adagrad, etc)") FLAGS = tf.flags.FLAGS diff --git a/udc_inputs.py b/udc_inputs.py index 071becc..92a794d 100644 --- a/udc_inputs.py +++ b/udc_inputs.py @@ -45,14 +45,6 @@ def input_fn(): queue_capacity=200000 + batch_size * 10, name="read_batch_features_{}".format(mode)) - # This is an ugly hack because of a current bug in tf.learn - # During evaluation TF tries to restore the epoch variable which isn't defined during training - # So we define the variable manually here - if mode == tf.contrib.learn.ModeKeys.TRAIN: - tf.get_variable( - "read_batch_features_eval/file_name_queue/limit_epochs/epochs", - initializer=tf.constant(0, dtype=tf.int64)) - if mode == tf.contrib.learn.ModeKeys.TRAIN: target = feature_map.pop("label") else: diff --git a/udc_model.py b/udc_model.py index 1ad0739..7ef3d39 100644 --- a/udc_model.py +++ b/udc_model.py @@ -76,20 +76,20 @@ def model_fn(features, targets, mode): probs, loss = model_impl( hparams, mode, - tf.concat(0, all_contexts), - tf.concat(0, all_context_lens), - tf.concat(0, all_utterances), - tf.concat(0, all_utterance_lens), - tf.concat(0, all_targets)) + tf.concat(all_contexts, 0), + tf.concat(all_context_lens, 0), + tf.concat(all_utterances, 0), + tf.concat(all_utterance_lens, 0), + tf.concat(all_targets, 0)) - split_probs = tf.split(0, 10, probs) - shaped_probs = tf.concat(1, split_probs) + split_probs = tf.split(probs, 10, 0) + shaped_probs = tf.concat(split_probs, 1) # Add summaries - tf.histogram_summary("eval_correct_probs_hist", split_probs[0]) - tf.scalar_summary("eval_correct_probs_average", tf.reduce_mean(split_probs[0])) - tf.histogram_summary("eval_incorrect_probs_hist", split_probs[1]) - tf.scalar_summary("eval_incorrect_probs_average", tf.reduce_mean(split_probs[1])) + tf.summary.histogram("eval_correct_probs_hist", split_probs[0]) + tf.summary.scalar("eval_correct_probs_average", tf.reduce_mean(split_probs[0])) + tf.summary.histogram("eval_incorrect_probs_hist", split_probs[1]) + tf.summary.scalar("eval_incorrect_probs_average", tf.reduce_mean(split_probs[1])) return shaped_probs, loss, None diff --git a/udc_predict.py b/udc_predict.py index c3850a0..346842c 100644 --- a/udc_predict.py +++ b/udc_predict.py @@ -55,4 +55,4 @@ def get_features(context, utterance): print("Context: {}".format(INPUT_CONTEXT)) for r in POTENTIAL_RESPONSES: prob = estimator.predict(input_fn=lambda: get_features(INPUT_CONTEXT, r)) - print("{}: {:g}".format(r, prob[0,0])) \ No newline at end of file + print("{}: {:g}".format(r, prob[0,0])) diff --git a/udc_test.py b/udc_test.py index a23cd50..1844b97 100644 --- a/udc_test.py +++ b/udc_test.py @@ -12,7 +12,7 @@ tf.flags.DEFINE_string("test_file", "./data/test.tfrecords", "Path of test data in TFRecords format") tf.flags.DEFINE_string("model_dir", None, "Directory to load model checkpoints from") tf.flags.DEFINE_integer("loglevel", 20, "Tensorflow log level") -tf.flags.DEFINE_integer("test_batch_size", 16, "Batch size for testing") +tf.flags.DEFINE_integer("test_batch_size", 8, "Batch size for testing") FLAGS = tf.flags.FLAGS if not FLAGS.model_dir: diff --git a/udc_train.py b/udc_train.py index 7ac81c3..8c11c38 100755 --- a/udc_train.py +++ b/udc_train.py @@ -1,3 +1,5 @@ +#!/usr/bin/python + import os import time import itertools From 823df9eeb0662757d7df3190395e392e80948199 Mon Sep 17 00:00:00 2001 From: Nickolay Shmyrev Date: Thu, 11 May 2017 16:44:34 +0200 Subject: [PATCH 2/2] Fixes for predictor --- udc_model.py | 4 +++- udc_predict.py | 22 ++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/udc_model.py b/udc_model.py index 7ef3d39..914733e 100644 --- a/udc_model.py +++ b/udc_model.py @@ -26,7 +26,8 @@ def model_fn(features, targets, mode): utterance, utterance_len = get_id_feature( features, "utterance", "utterance_len", hparams.max_utterance_len) - batch_size = targets.get_shape().as_list()[0] + if targets != None: + batch_size = targets.get_shape().as_list()[0] if mode == tf.contrib.learn.ModeKeys.TRAIN: probs, loss = model_impl( @@ -53,6 +54,7 @@ def model_fn(features, targets, mode): if mode == tf.contrib.learn.ModeKeys.EVAL: + # We have 10 exampels per record, so we accumulate them all_contexts = [context] all_context_lens = [context_len] diff --git a/udc_predict.py b/udc_predict.py index 346842c..0689ddc 100644 --- a/udc_predict.py +++ b/udc_predict.py @@ -13,6 +13,7 @@ tf.flags.DEFINE_string("model_dir", None, "Directory to load model checkpoints from") tf.flags.DEFINE_string("vocab_processor_file", "./data/vocab_processor.bin", "Saved vocabulary processor file") +tf.flags.DEFINE_string("vocab", "./data/vocabulary.txt", "Vocabulary file") FLAGS = tf.flags.FLAGS if not FLAGS.model_dir: @@ -22,13 +23,18 @@ def tokenizer_fn(iterator): return (x.split(" ") for x in iterator) -# Load vocabulary -vp = tf.contrib.learn.preprocessing.VocabularyProcessor.restore( - FLAGS.vocab_processor_file) +# Create vocabulary ourselves or load saved one +if not FLAGS.vocab_processor_file: + vp = tf.contrib.learn.preprocessing.VocabularyProcessor(100000) + vp.fit(open(FLAGS.vocab_processor_file)) + vp.save('./data/vocab_processor.bin') +else: + vp = tf.contrib.learn.preprocessing.VocabularyProcessor.restore( + FLAGS.vocab_processor_file) # Load your own data here -INPUT_CONTEXT = "Example context" -POTENTIAL_RESPONSES = ["Response 1", "Response 2"] +INPUT_CONTEXT = "hi" +POTENTIAL_RESPONSES = ["hello", "goodbye", "maybe"] def get_features(context, utterance): context_matrix = np.array(list(vp.transform([context]))) @@ -48,11 +54,7 @@ def get_features(context, utterance): model_fn = udc_model.create_model_fn(hparams, model_impl=dual_encoder_model) estimator = tf.contrib.learn.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir) - # Ugly hack, seems to be a bug in Tensorflow - # estimator.predict doesn't work without this line - estimator._targets_info = tf.contrib.learn.estimators.tensor_signature.TensorSignature(tf.constant(0, shape=[1,1])) - print("Context: {}".format(INPUT_CONTEXT)) for r in POTENTIAL_RESPONSES: prob = estimator.predict(input_fn=lambda: get_features(INPUT_CONTEXT, r)) - print("{}: {:g}".format(r, prob[0,0])) + print("{}: {}".format(r, prob.next()[0]))