diff --git a/models/dual_encoder.py b/models/dual_encoder.py index e0e94a6..cb35f0e 100644 --- a/models/dual_encoder.py +++ b/models/dual_encoder.py @@ -51,10 +51,10 @@ def dual_encoder_model( # Run the utterance and context through the RNN rnn_outputs, rnn_states = tf.nn.dynamic_rnn( cell, - tf.concat(0, [context_embedded, utterance_embedded]), - sequence_length=tf.concat(0, [context_len, utterance_len]), + tf.concat([context_embedded, utterance_embedded], 0), + sequence_length=tf.concat([context_len, utterance_len], 0), dtype=tf.float32) - encoding_context, encoding_utterance = tf.split(0, 2, rnn_states.h) + encoding_context, encoding_utterance = tf.split(rnn_states.h, 2, 0) with tf.variable_scope("prediction") as vs: M = tf.get_variable("M", @@ -68,7 +68,7 @@ def dual_encoder_model( # Dot product between generated response and actual response # (c * M) * r - logits = tf.batch_matmul(generated_response, encoding_utterance, True) + logits = tf.matmul(generated_response, encoding_utterance, True) logits = tf.squeeze(logits, [2]) # Apply sigmoid to convert logits to probabilities @@ -78,7 +78,7 @@ def dual_encoder_model( return probs, None # Calculate the binary cross-entropy loss - losses = tf.nn.sigmoid_cross_entropy_with_logits(logits, tf.to_float(targets)) + losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=tf.to_float(targets)) # Mean loss across the batch of examples mean_loss = tf.reduce_mean(losses, name="mean_loss") diff --git a/udc_hparams.py b/udc_hparams.py index c5747ae..b7ebb0d 100644 --- a/udc_hparams.py +++ b/udc_hparams.py @@ -19,8 +19,8 @@ # Training Parameters tf.flags.DEFINE_float("learning_rate", 0.001, "Learning rate") -tf.flags.DEFINE_integer("batch_size", 128, "Batch size during training") -tf.flags.DEFINE_integer("eval_batch_size", 16, "Batch size during evaluation") +tf.flags.DEFINE_integer("batch_size", 64, "Batch size during training") +tf.flags.DEFINE_integer("eval_batch_size", 8, "Batch size during evaluation") tf.flags.DEFINE_string("optimizer", "Adam", "Optimizer Name (Adam, Adagrad, etc)") FLAGS = tf.flags.FLAGS diff --git a/udc_model.py b/udc_model.py index 1ad0739..7ef3d39 100644 --- a/udc_model.py +++ b/udc_model.py @@ -76,20 +76,20 @@ def model_fn(features, targets, mode): probs, loss = model_impl( hparams, mode, - tf.concat(0, all_contexts), - tf.concat(0, all_context_lens), - tf.concat(0, all_utterances), - tf.concat(0, all_utterance_lens), - tf.concat(0, all_targets)) + tf.concat(all_contexts, 0), + tf.concat(all_context_lens, 0), + tf.concat(all_utterances, 0), + tf.concat(all_utterance_lens, 0), + tf.concat(all_targets, 0)) - split_probs = tf.split(0, 10, probs) - shaped_probs = tf.concat(1, split_probs) + split_probs = tf.split(probs, 10, 0) + shaped_probs = tf.concat(split_probs, 1) # Add summaries - tf.histogram_summary("eval_correct_probs_hist", split_probs[0]) - tf.scalar_summary("eval_correct_probs_average", tf.reduce_mean(split_probs[0])) - tf.histogram_summary("eval_incorrect_probs_hist", split_probs[1]) - tf.scalar_summary("eval_incorrect_probs_average", tf.reduce_mean(split_probs[1])) + tf.summary.histogram("eval_correct_probs_hist", split_probs[0]) + tf.summary.scalar("eval_correct_probs_average", tf.reduce_mean(split_probs[0])) + tf.summary.histogram("eval_incorrect_probs_hist", split_probs[1]) + tf.summary.scalar("eval_incorrect_probs_average", tf.reduce_mean(split_probs[1])) return shaped_probs, loss, None diff --git a/udc_test.py b/udc_test.py index a23cd50..1844b97 100644 --- a/udc_test.py +++ b/udc_test.py @@ -12,7 +12,7 @@ tf.flags.DEFINE_string("test_file", "./data/test.tfrecords", "Path of test data in TFRecords format") tf.flags.DEFINE_string("model_dir", None, "Directory to load model checkpoints from") tf.flags.DEFINE_integer("loglevel", 20, "Tensorflow log level") -tf.flags.DEFINE_integer("test_batch_size", 16, "Batch size for testing") +tf.flags.DEFINE_integer("test_batch_size", 8, "Batch size for testing") FLAGS = tf.flags.FLAGS if not FLAGS.model_dir: