From 34fdf3348c28ec046987242bb94974d3a199f8f3 Mon Sep 17 00:00:00 2001 From: Phil Wang Date: Tue, 19 Jan 2021 01:16:56 -0800 Subject: [PATCH] utf8 fix for reading bpe --- deep_daze/clip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deep_daze/clip.py b/deep_daze/clip.py index 2fee914..1f52ade 100644 --- a/deep_daze/clip.py +++ b/deep_daze/clip.py @@ -61,7 +61,7 @@ class SimpleTokenizer(object): def __init__(self, bpe_path: str = default_bpe()): self.byte_encoder = bytes_to_unicode() self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} - merges = Path(bpe_path).read_text().split('\n') + merges = Path(bpe_path).read_text(encoding='utf8').split('\n') merges = merges[1:49152-256-2+1] merges = [tuple(merge.split()) for merge in merges] vocab = list(bytes_to_unicode().values())