Skip to content

Commit

Permalink
utf8 fix for reading bpe
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains authored Jan 19, 2021
1 parent 816a04b commit 34fdf33
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion deep_daze/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class SimpleTokenizer(object):
def __init__(self, bpe_path: str = default_bpe()):
self.byte_encoder = bytes_to_unicode()
self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
merges = Path(bpe_path).read_text().split('\n')
merges = Path(bpe_path).read_text(encoding='utf8').split('\n')
merges = merges[1:49152-256-2+1]
merges = [tuple(merge.split()) for merge in merges]
vocab = list(bytes_to_unicode().values())
Expand Down

0 comments on commit 34fdf33

Please sign in to comment.