From 635e76df781c8eb1b777719089befa20e53b5c48 Mon Sep 17 00:00:00 2001 From: Kristian Aune Date: Fri, 28 Jun 2024 14:47:19 -0700 Subject: [PATCH] trust_remote_code --- colbert-long/scripts/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colbert-long/scripts/convert.py b/colbert-long/scripts/convert.py index 0483a0b33..c5f2535c2 100644 --- a/colbert-long/scripts/convert.py +++ b/colbert-long/scripts/convert.py @@ -14,7 +14,7 @@ def remove_control_characters(s): is_separator_regex = False, ) -documents = load_dataset('Shitao/MLDR', "corpus-en", split='corpus') +documents = load_dataset('Shitao/MLDR', "corpus-en", split='corpus', trust_remote_code=True) feed_file = "/tmp/vespa_feed_file_en.json" with open(feed_file, "w") as f: for doc in documents: