diff --git a/colbert-long/scripts/convert.py b/colbert-long/scripts/convert.py index 0483a0b33..c5f2535c2 100644 --- a/colbert-long/scripts/convert.py +++ b/colbert-long/scripts/convert.py @@ -14,7 +14,7 @@ def remove_control_characters(s): is_separator_regex = False, ) -documents = load_dataset('Shitao/MLDR', "corpus-en", split='corpus') +documents = load_dataset('Shitao/MLDR', "corpus-en", split='corpus', trust_remote_code=True) feed_file = "/tmp/vespa_feed_file_en.json" with open(feed_file, "w") as f: for doc in documents: