dennybritz · nshmyrev · May 10, 2017 · May 11, 2017
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ The code here implements the Dual LSTM Encoder model from [The Ubuntu Dialogue C
 
 #### Setup
 
-This code uses Python 3 and Tensorflow >= 0.9. Clone the repository and install all required packages:
+This code uses Python and Tensorflow == 1.0.0. Clone the repository and install all required packages:
 
 ```
 pip install -U pip

diff --git a/models/dual_encoder.py b/models/dual_encoder.py
@@ -42,7 +42,8 @@ def dual_encoder_model(
   # Build the RNN
   with tf.variable_scope("rnn") as vs:
     # We use an LSTM Cell
-    cell = tf.nn.rnn_cell.LSTMCell(
+
+    cell = tf.contrib.rnn.LSTMCell(
         hparams.rnn_dim,
         forget_bias=2.0,
         use_peepholes=True,
@@ -51,10 +52,10 @@ def dual_encoder_model(
     # Run the utterance and context through the RNN
     rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
         cell,
-        tf.concat(0, [context_embedded, utterance_embedded]),
-        sequence_length=tf.concat(0, [context_len, utterance_len]),
+        tf.concat([context_embedded, utterance_embedded], 0),
+        sequence_length=tf.concat([context_len, utterance_len], 0),
         dtype=tf.float32)
-    encoding_context, encoding_utterance = tf.split(0, 2, rnn_states.h)
+    encoding_context, encoding_utterance = tf.split(rnn_states.h, 2, 0)
 
   with tf.variable_scope("prediction") as vs:
     M = tf.get_variable("M",
@@ -68,7 +69,7 @@ def dual_encoder_model(
 
     # Dot product between generated response and actual response
     # (c * M) * r
-    logits = tf.batch_matmul(generated_response, encoding_utterance, True)
+    logits = tf.matmul(generated_response, encoding_utterance, True)
     logits = tf.squeeze(logits, [2])
 
     # Apply sigmoid to convert logits to probabilities
@@ -78,7 +79,7 @@ def dual_encoder_model(
       return probs, None
 
     # Calculate the binary cross-entropy loss
-    losses = tf.nn.sigmoid_cross_entropy_with_logits(logits, tf.to_float(targets))
+    losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.to_float(targets), logits=logits)
 
   # Mean loss across the batch of examples
   mean_loss = tf.reduce_mean(losses, name="mean_loss")

diff --git a/requirements.txt b/requirements.txt
@@ -15,7 +15,6 @@ jupyter-client==4.3.0
 jupyter-console==4.1.1
 jupyter-core==4.1.0
 MarkupSafe==0.23
-matplotlib==1.5.1
 mistune==0.7.2
 nbconvert==4.2.0
 nbformat==4.0.1
@@ -36,7 +35,7 @@ scikit-learn==0.17.1
 scipy==0.17.1
 simplegeneric==0.8.1
 six==1.10.0
-tensorflow==0.9.0
+tensorflow==1.0.0
 terminado==0.6
 tornado==4.3
 traitlets==4.2.1

diff --git a/udc_hparams.py b/udc_hparams.py
@@ -9,7 +9,7 @@
 
 # Model Parameters
 tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of the embeddings")
-tf.flags.DEFINE_integer("rnn_dim", 256, "Dimensionality of the RNN cell")
+tf.flags.DEFINE_integer("rnn_dim", 128, "Dimensionality of the RNN cell")
 tf.flags.DEFINE_integer("max_context_len", 160, "Truncate contexts to this length")
 tf.flags.DEFINE_integer("max_utterance_len", 80, "Truncate utterance to this length")
 
@@ -20,7 +20,7 @@
 # Training Parameters
 tf.flags.DEFINE_float("learning_rate", 0.001, "Learning rate")
 tf.flags.DEFINE_integer("batch_size", 128, "Batch size during training")
-tf.flags.DEFINE_integer("eval_batch_size", 16, "Batch size during evaluation")
+tf.flags.DEFINE_integer("eval_batch_size", 8, "Batch size during evaluation")
 tf.flags.DEFINE_string("optimizer", "Adam", "Optimizer Name (Adam, Adagrad, etc)")
 
 FLAGS = tf.flags.FLAGS

diff --git a/udc_inputs.py b/udc_inputs.py
@@ -45,14 +45,6 @@ def input_fn():
         queue_capacity=200000 + batch_size * 10,
         name="read_batch_features_{}".format(mode))
 
-    # This is an ugly hack because of a current bug in tf.learn
-    # During evaluation TF tries to restore the epoch variable which isn't defined during training
-    # So we define the variable manually here
-    if mode == tf.contrib.learn.ModeKeys.TRAIN:
-      tf.get_variable(
-        "read_batch_features_eval/file_name_queue/limit_epochs/epochs",
-        initializer=tf.constant(0, dtype=tf.int64))
-
     if mode == tf.contrib.learn.ModeKeys.TRAIN:
       target = feature_map.pop("label")
     else:

diff --git a/udc_model.py b/udc_model.py
@@ -26,7 +26,8 @@ def model_fn(features, targets, mode):
     utterance, utterance_len = get_id_feature(
         features, "utterance", "utterance_len", hparams.max_utterance_len)
 
-    batch_size = targets.get_shape().as_list()[0]
+    if targets != None:
+        batch_size = targets.get_shape().as_list()[0]
 
     if mode == tf.contrib.learn.ModeKeys.TRAIN:
       probs, loss = model_impl(
@@ -53,6 +54,7 @@ def model_fn(features, targets, mode):
 
     if mode == tf.contrib.learn.ModeKeys.EVAL:
 
+
       # We have 10 exampels per record, so we accumulate them
       all_contexts = [context]
       all_context_lens = [context_len]
@@ -76,20 +78,20 @@ def model_fn(features, targets, mode):
       probs, loss = model_impl(
           hparams,
           mode,
-          tf.concat(0, all_contexts),
-          tf.concat(0, all_context_lens),
-          tf.concat(0, all_utterances),
-          tf.concat(0, all_utterance_lens),
-          tf.concat(0, all_targets))
+          tf.concat(all_contexts, 0),
+          tf.concat(all_context_lens, 0),
+          tf.concat(all_utterances, 0),
+          tf.concat(all_utterance_lens, 0),
+          tf.concat(all_targets, 0))
 
-      split_probs = tf.split(0, 10, probs)
-      shaped_probs = tf.concat(1, split_probs)
+      split_probs = tf.split(probs, 10, 0)
+      shaped_probs = tf.concat(split_probs, 1)
 
       # Add summaries
-      tf.histogram_summary("eval_correct_probs_hist", split_probs[0])
-      tf.scalar_summary("eval_correct_probs_average", tf.reduce_mean(split_probs[0]))
-      tf.histogram_summary("eval_incorrect_probs_hist", split_probs[1])
-      tf.scalar_summary("eval_incorrect_probs_average", tf.reduce_mean(split_probs[1]))
+      tf.summary.histogram("eval_correct_probs_hist", split_probs[0])
+      tf.summary.scalar("eval_correct_probs_average", tf.reduce_mean(split_probs[0]))
+      tf.summary.histogram("eval_incorrect_probs_hist", split_probs[1])
+      tf.summary.scalar("eval_incorrect_probs_average", tf.reduce_mean(split_probs[1]))
 
       return shaped_probs, loss, None
 

diff --git a/udc_predict.py b/udc_predict.py
@@ -13,6 +13,7 @@
 
 tf.flags.DEFINE_string("model_dir", None, "Directory to load model checkpoints from")
 tf.flags.DEFINE_string("vocab_processor_file", "./data/vocab_processor.bin", "Saved vocabulary processor file")
+tf.flags.DEFINE_string("vocab", "./data/vocabulary.txt", "Vocabulary file")
 FLAGS = tf.flags.FLAGS
 
 if not FLAGS.model_dir:
@@ -22,13 +23,18 @@
 def tokenizer_fn(iterator):
   return (x.split(" ") for x in iterator)
 
-# Load vocabulary
-vp = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(
-  FLAGS.vocab_processor_file)
+# Create vocabulary ourselves or load saved one
+if not FLAGS.vocab_processor_file:
+  vp = tf.contrib.learn.preprocessing.VocabularyProcessor(100000)
+  vp.fit(open(FLAGS.vocab_processor_file))
+  vp.save('./data/vocab_processor.bin')
+else:
+  vp = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(
+    FLAGS.vocab_processor_file)
 
 # Load your own data here
-INPUT_CONTEXT = "Example context"
-POTENTIAL_RESPONSES = ["Response 1", "Response 2"]
+INPUT_CONTEXT = "hi"
+POTENTIAL_RESPONSES = ["hello", "goodbye", "maybe"]
 
 def get_features(context, utterance):
   context_matrix = np.array(list(vp.transform([context])))
@@ -48,11 +54,7 @@ def get_features(context, utterance):
   model_fn = udc_model.create_model_fn(hparams, model_impl=dual_encoder_model)
   estimator = tf.contrib.learn.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir)
 
-  # Ugly hack, seems to be a bug in Tensorflow
-  # estimator.predict doesn't work without this line
-  estimator._targets_info = tf.contrib.learn.estimators.tensor_signature.TensorSignature(tf.constant(0, shape=[1,1]))
-
   print("Context: {}".format(INPUT_CONTEXT))
   for r in POTENTIAL_RESPONSES:
     prob = estimator.predict(input_fn=lambda: get_features(INPUT_CONTEXT, r))
-    print("{}: {:g}".format(r, prob[0,0]))
+    print("{}: {}".format(r, prob.next()[0]))
diff --git a/udc_test.py b/udc_test.py
@@ -12,7 +12,7 @@
 tf.flags.DEFINE_string("test_file", "./data/test.tfrecords", "Path of test data in TFRecords format")
 tf.flags.DEFINE_string("model_dir", None, "Directory to load model checkpoints from")
 tf.flags.DEFINE_integer("loglevel", 20, "Tensorflow log level")
-tf.flags.DEFINE_integer("test_batch_size", 16, "Batch size for testing")
+tf.flags.DEFINE_integer("test_batch_size", 8, "Batch size for testing")
 FLAGS = tf.flags.FLAGS
 
 if not FLAGS.model_dir:

diff --git a/udc_train.py b/udc_train.py
@@ -1,3 +1,5 @@
+#!/usr/bin/python
+
 import os
 import time
 import itertools