Skip to content

Commit

Permalink
Fix type error when tokenizing an empty tensor (#659)
Browse files Browse the repository at this point in the history
  • Loading branch information
guillaumekln committed May 6, 2020
1 parent 6c7fc9b commit 866259f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
9 changes: 9 additions & 0 deletions opennmt/tests/tokenizer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,15 @@ def testOpenNMTTokenizer(self):
[["Hello", "world", "■!"], ["Test"], ["My", "name"]],
["Hello world!", "Test", "My name"])

def testOpenNMTTokenizerEmptyTensor(self):
tokenizer = tokenizers.OpenNMTTokenizer()
tokens = tokenizer.tokenize(tf.constant(""))
self.assertIs(tokens.dtype, tf.string)
self.assertListEqual(tokens.shape.as_list(), [0])
text = tokenizer.detokenize(tokens)
self.assertIs(text.dtype, tf.string)
self.assertListEqual(text.shape.as_list(), [])

def testOpenNMTTokenizerArguments(self):
tokenizer = tokenizers.OpenNMTTokenizer(
mode="aggressive", spacer_annotate=True, spacer_new=True)
Expand Down
2 changes: 1 addition & 1 deletion opennmt/tokenizers/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def _tokenize_tensor(self, text):
def _python_wrapper(string_t):
string = tf.compat.as_text(string_t.numpy())
tokens = self._tokenize_string(string)
return tf.constant(tokens)
return tf.constant(tokens, dtype=tf.string)
tokens = tf.py_function(_python_wrapper, [text], tf.string)
tokens.set_shape([None])
return tokens
Expand Down

0 comments on commit 866259f

Please sign in to comment.