diff --git a/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py b/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py index 923d611..244d97a 100755 --- a/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py +++ b/src/tasksource/.ipynb_checkpoints/tasks-checkpoint.py @@ -772,7 +772,7 @@ def _preprocess_chatgpt_detection(ex): dataset_name="ColumbiaNLP/FLUTE") strategy_qa = Classification('question',labels='answer', - dataset_name="metaeval/strategy-qa",splits=['train',None,None]) + dataset_name="tasksource/strategy-qa",splits=['train',None,None]) summarize_from_feedback = MultipleChoice(get.info.post, choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']], @@ -951,6 +951,7 @@ def _udep_post_process(ds): #for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias": # print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')") +""" mbib_cognitive_bias = Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias') mbib_fake_news = Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news') mbib_gender_bias = Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias') @@ -959,6 +960,7 @@ def _udep_post_process(ds): mbib_political_bias = Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias') mbib_racial_bias = Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias') mbib_text_level_bias = Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias') +""" robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR") @@ -1021,7 +1023,7 @@ def _icl_rand(x): icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)], labels=lambda x: str(x['symbols'][_icl_rand(x)]==x['targets']), dataset_name="tasksource/icl-symbol-tuning-instruct", - pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char + pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<500*4), # 500 tokens of 4 char ) space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI") @@ -1234,4 +1236,16 @@ def _nlgraph_binarize(x): lex_glue___ecthr_a = Classification(sentence1="text", labels="labels",dataset_name="coastalcph/lex_glue",config_name="ecthr_a") # too long lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long -ultrafeedback = MultipleChoice("question", choices=['response_j','reponse_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired") \ No newline at end of file +ultrafeedback = MultipleChoice("question", choices=['response_j','response_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired") + +essay_scoring = Classification("full_text",labels="score",dataset_name='tasksource/AES2-essay-scoring') + +argument_feedback = Classification("discourse_text",labels="discourse_effectiveness", dataset_name="tasksource/argument-feedback") + +eg = lambda x: Classification("full_text", labels=lambda y:int(y[x]), dataset_name="tasksource/english-grading") +grading__cohesion = eg('cohesion') +grading__syntax = eg('syntax') +grading__vocabulary = eg('vocabulary') +grading__phraseology = eg('phraseology') +grading__grammar = eg('grammar') +grading__conventions = eg('conventions') diff --git a/src/tasksource/tasks.py b/src/tasksource/tasks.py index 923d611..244d97a 100755 --- a/src/tasksource/tasks.py +++ b/src/tasksource/tasks.py @@ -772,7 +772,7 @@ def _preprocess_chatgpt_detection(ex): dataset_name="ColumbiaNLP/FLUTE") strategy_qa = Classification('question',labels='answer', - dataset_name="metaeval/strategy-qa",splits=['train',None,None]) + dataset_name="tasksource/strategy-qa",splits=['train',None,None]) summarize_from_feedback = MultipleChoice(get.info.post, choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']], @@ -951,6 +951,7 @@ def _udep_post_process(ds): #for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias": # print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')") +""" mbib_cognitive_bias = Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias') mbib_fake_news = Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news') mbib_gender_bias = Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias') @@ -959,6 +960,7 @@ def _udep_post_process(ds): mbib_political_bias = Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias') mbib_racial_bias = Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias') mbib_text_level_bias = Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias') +""" robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR") @@ -1021,7 +1023,7 @@ def _icl_rand(x): icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)], labels=lambda x: str(x['symbols'][_icl_rand(x)]==x['targets']), dataset_name="tasksource/icl-symbol-tuning-instruct", - pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<200*4), # 200 tokens of 4 char + pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<500*4), # 500 tokens of 4 char ) space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI") @@ -1234,4 +1236,16 @@ def _nlgraph_binarize(x): lex_glue___ecthr_a = Classification(sentence1="text", labels="labels",dataset_name="coastalcph/lex_glue",config_name="ecthr_a") # too long lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long -ultrafeedback = MultipleChoice("question", choices=['response_j','reponse_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired") \ No newline at end of file +ultrafeedback = MultipleChoice("question", choices=['response_j','response_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired") + +essay_scoring = Classification("full_text",labels="score",dataset_name='tasksource/AES2-essay-scoring') + +argument_feedback = Classification("discourse_text",labels="discourse_effectiveness", dataset_name="tasksource/argument-feedback") + +eg = lambda x: Classification("full_text", labels=lambda y:int(y[x]), dataset_name="tasksource/english-grading") +grading__cohesion = eg('cohesion') +grading__syntax = eg('syntax') +grading__vocabulary = eg('vocabulary') +grading__phraseology = eg('phraseology') +grading__grammar = eg('grammar') +grading__conventions = eg('conventions') diff --git a/tasks.md b/tasks.md index c3064a2..23d38db 100644 --- a/tasks.md +++ b/tasks.md @@ -15,25 +15,25 @@ | 12 | anli/a1 | anli | | a1 | anli__a1 | Classification | | 13 | anli/a2 | anli | | a2 | anli__a2 | Classification | | 14 | anli/a3 | anli | | a3 | anli__a3 | Classification | -| 15 | babi_nli/compound-coreference | tasksource/babi_nli | compound-coreference | | babi_nli | Classification | -| 16 | babi_nli/three-supporting-facts | tasksource/babi_nli | three-supporting-facts | | babi_nli | Classification | -| 17 | babi_nli/basic-deduction | tasksource/babi_nli | basic-deduction | | babi_nli | Classification | -| 18 | babi_nli/basic-induction | tasksource/babi_nli | basic-induction | | babi_nli | Classification | -| 19 | babi_nli/single-supporting-fact | tasksource/babi_nli | single-supporting-fact | | babi_nli | Classification | -| 20 | babi_nli/yes-no-questions | tasksource/babi_nli | yes-no-questions | | babi_nli | Classification | -| 21 | babi_nli/two-supporting-facts | tasksource/babi_nli | two-supporting-facts | | babi_nli | Classification | -| 22 | babi_nli/time-reasoning | tasksource/babi_nli | time-reasoning | | babi_nli | Classification | -| 23 | babi_nli/three-arg-relations | tasksource/babi_nli | three-arg-relations | | babi_nli | Classification | -| 24 | babi_nli/positional-reasoning | tasksource/babi_nli | positional-reasoning | | babi_nli | Classification | -| 25 | babi_nli/simple-negation | tasksource/babi_nli | simple-negation | | babi_nli | Classification | -| 26 | babi_nli/counting | tasksource/babi_nli | counting | | babi_nli | Classification | -| 27 | babi_nli/two-arg-relations | tasksource/babi_nli | two-arg-relations | | babi_nli | Classification | -| 28 | babi_nli/lists-sets | tasksource/babi_nli | lists-sets | | babi_nli | Classification | -| 29 | babi_nli/path-finding | tasksource/babi_nli | path-finding | | babi_nli | Classification | +| 15 | babi_nli/basic-coreference | tasksource/babi_nli | basic-coreference | | babi_nli | Classification | +| 16 | babi_nli/single-supporting-fact | tasksource/babi_nli | single-supporting-fact | | babi_nli | Classification | +| 17 | babi_nli/yes-no-questions | tasksource/babi_nli | yes-no-questions | | babi_nli | Classification | +| 18 | babi_nli/compound-coreference | tasksource/babi_nli | compound-coreference | | babi_nli | Classification | +| 19 | babi_nli/size-reasoning | tasksource/babi_nli | size-reasoning | | babi_nli | Classification | +| 20 | babi_nli/lists-sets | tasksource/babi_nli | lists-sets | | babi_nli | Classification | +| 21 | babi_nli/positional-reasoning | tasksource/babi_nli | positional-reasoning | | babi_nli | Classification | +| 22 | babi_nli/indefinite-knowledge | tasksource/babi_nli | indefinite-knowledge | | babi_nli | Classification | +| 23 | babi_nli/path-finding | tasksource/babi_nli | path-finding | | babi_nli | Classification | +| 24 | babi_nli/two-supporting-facts | tasksource/babi_nli | two-supporting-facts | | babi_nli | Classification | +| 25 | babi_nli/basic-induction | tasksource/babi_nli | basic-induction | | babi_nli | Classification | +| 26 | babi_nli/three-arg-relations | tasksource/babi_nli | three-arg-relations | | babi_nli | Classification | +| 27 | babi_nli/time-reasoning | tasksource/babi_nli | time-reasoning | | babi_nli | Classification | +| 28 | babi_nli/two-arg-relations | tasksource/babi_nli | two-arg-relations | | babi_nli | Classification | +| 29 | babi_nli/three-supporting-facts | tasksource/babi_nli | three-supporting-facts | | babi_nli | Classification | | 30 | babi_nli/conjunction | tasksource/babi_nli | conjunction | | babi_nli | Classification | -| 31 | babi_nli/size-reasoning | tasksource/babi_nli | size-reasoning | | babi_nli | Classification | -| 32 | babi_nli/basic-coreference | tasksource/babi_nli | basic-coreference | | babi_nli | Classification | -| 33 | babi_nli/indefinite-knowledge | tasksource/babi_nli | indefinite-knowledge | | babi_nli | Classification | +| 31 | babi_nli/basic-deduction | tasksource/babi_nli | basic-deduction | | babi_nli | Classification | +| 32 | babi_nli/counting | tasksource/babi_nli | counting | | babi_nli | Classification | +| 33 | babi_nli/simple-negation | tasksource/babi_nli | simple-negation | | babi_nli | Classification | | 34 | sick/label | sick | | label | sick__label | Classification | | 35 | sick/relatedness | sick | | relatedness | sick__relatedness | Classification | | 36 | sick/entailment_AB | sick | | entailment_AB | sick__entailment_AB | Classification | @@ -41,17 +41,17 @@ | 38 | scitail/snli_format | scitail | snli_format | | scitail | Classification | | 39 | hans | hans | | | hans | Classification | | 40 | WANLI | alisawuffles/WANLI | | | wanli | Classification | -| 41 | recast/recast_verbnet | tasksource/recast | recast_verbnet | | recast_nli | Classification | +| 41 | recast/recast_puns | tasksource/recast | recast_puns | | recast_nli | Classification | | 42 | recast/recast_factuality | tasksource/recast | recast_factuality | | recast_nli | Classification | -| 43 | recast/recast_puns | tasksource/recast | recast_puns | | recast_nli | Classification | -| 44 | recast/recast_kg_relations | tasksource/recast | recast_kg_relations | | recast_nli | Classification | -| 45 | recast/recast_sentiment | tasksource/recast | recast_sentiment | | recast_nli | Classification | +| 43 | recast/recast_verbnet | tasksource/recast | recast_verbnet | | recast_nli | Classification | +| 44 | recast/recast_sentiment | tasksource/recast | recast_sentiment | | recast_nli | Classification | +| 45 | recast/recast_kg_relations | tasksource/recast | recast_kg_relations | | recast_nli | Classification | | 46 | recast/recast_verbcorner | tasksource/recast | recast_verbcorner | | recast_nli | Classification | | 47 | recast/recast_ner | tasksource/recast | recast_ner | | recast_nli | Classification | | 48 | recast/recast_megaveridicality | tasksource/recast | recast_megaveridicality | | recast_nli | Classification | -| 49 | probability_words_nli/usnli | sileod/probability_words_nli | usnli | | probability_words_nli | Classification | -| 50 | probability_words_nli/reasoning_2hop | sileod/probability_words_nli | reasoning_2hop | | probability_words_nli | Classification | -| 51 | probability_words_nli/reasoning_1hop | sileod/probability_words_nli | reasoning_1hop | | probability_words_nli | Classification | +| 49 | probability_words_nli/reasoning_2hop | sileod/probability_words_nli | reasoning_2hop | | probability_words_nli | Classification | +| 50 | probability_words_nli/reasoning_1hop | sileod/probability_words_nli | reasoning_1hop | | probability_words_nli | Classification | +| 51 | probability_words_nli/usnli | sileod/probability_words_nli | usnli | | probability_words_nli | Classification | | 52 | nan-nli/joey234--nan-nli | joey234/nan-nli | joey234--nan-nli | | nan_nli | Classification | | 53 | nli_fever | pietrolesci/nli_fever | | | nli_fever | Classification | | 54 | breaking_nli | pietrolesci/breaking_nli | | | breaking_nli | Classification | @@ -81,28 +81,28 @@ | 78 | gen_debiased_nli/mnli_z_aug | pietrolesci/gen_debiased_nli | | mnli_z_aug | gen_debiased_nli__mnli_z_aug | Classification | | 79 | gen_debiased_nli/mnli_seq_z | pietrolesci/gen_debiased_nli | | mnli_seq_z | gen_debiased_nli__mnli_seq_z | Classification | | 80 | add_one_rte | pietrolesci/add_one_rte | | | add_one_rte | Classification | -| 81 | imppres/presupposition_all_n_presupposition/presupposition | tasksource/imppres | presupposition_all_n_presupposition | presupposition | imppres__presupposition | Classification | -| 82 | imppres/presupposition_only_presupposition/presupposition | tasksource/imppres | presupposition_only_presupposition | presupposition | imppres__presupposition | Classification | -| 83 | imppres/presupposition_cleft_existence/presupposition | tasksource/imppres | presupposition_cleft_existence | presupposition | imppres__presupposition | Classification | -| 84 | imppres/presupposition_question_presupposition/presupposition | tasksource/imppres | presupposition_question_presupposition | presupposition | imppres__presupposition | Classification | -| 85 | imppres/presupposition_cleft_uniqueness/presupposition | tasksource/imppres | presupposition_cleft_uniqueness | presupposition | imppres__presupposition | Classification | -| 86 | imppres/presupposition_possessed_definites_uniqueness/presupposition | tasksource/imppres | presupposition_possessed_definites_uniqueness | presupposition | imppres__presupposition | Classification | +| 81 | imppres/presupposition_cleft_uniqueness/presupposition | tasksource/imppres | presupposition_cleft_uniqueness | presupposition | imppres__presupposition | Classification | +| 82 | imppres/presupposition_cleft_existence/presupposition | tasksource/imppres | presupposition_cleft_existence | presupposition | imppres__presupposition | Classification | +| 83 | imppres/presupposition_possessed_definites_uniqueness/presupposition | tasksource/imppres | presupposition_possessed_definites_uniqueness | presupposition | imppres__presupposition | Classification | +| 84 | imppres/presupposition_possessed_definites_existence/presupposition | tasksource/imppres | presupposition_possessed_definites_existence | presupposition | imppres__presupposition | Classification | +| 85 | imppres/presupposition_only_presupposition/presupposition | tasksource/imppres | presupposition_only_presupposition | presupposition | imppres__presupposition | Classification | +| 86 | imppres/presupposition_all_n_presupposition/presupposition | tasksource/imppres | presupposition_all_n_presupposition | presupposition | imppres__presupposition | Classification | | 87 | imppres/presupposition_both_presupposition/presupposition | tasksource/imppres | presupposition_both_presupposition | presupposition | imppres__presupposition | Classification | -| 88 | imppres/presupposition_possessed_definites_existence/presupposition | tasksource/imppres | presupposition_possessed_definites_existence | presupposition | imppres__presupposition | Classification | -| 89 | imppres/presupposition_change_of_state/presupposition | tasksource/imppres | presupposition_change_of_state | presupposition | imppres__presupposition | Classification | -| 90 | imppres/implicature_gradable_adjective/prag | tasksource/imppres | implicature_gradable_adjective | prag | imppres__prag | Classification | -| 91 | imppres/implicature_gradable_verb/prag | tasksource/imppres | implicature_gradable_verb | prag | imppres__prag | Classification | -| 92 | imppres/implicature_connectives/prag | tasksource/imppres | implicature_connectives | prag | imppres__prag | Classification | -| 93 | imppres/implicature_modals/prag | tasksource/imppres | implicature_modals | prag | imppres__prag | Classification | +| 88 | imppres/presupposition_change_of_state/presupposition | tasksource/imppres | presupposition_change_of_state | presupposition | imppres__presupposition | Classification | +| 89 | imppres/presupposition_question_presupposition/presupposition | tasksource/imppres | presupposition_question_presupposition | presupposition | imppres__presupposition | Classification | +| 90 | imppres/implicature_connectives/prag | tasksource/imppres | implicature_connectives | prag | imppres__prag | Classification | +| 91 | imppres/implicature_modals/prag | tasksource/imppres | implicature_modals | prag | imppres__prag | Classification | +| 92 | imppres/implicature_numerals_2_3/prag | tasksource/imppres | implicature_numerals_2_3 | prag | imppres__prag | Classification | +| 93 | imppres/implicature_quantifiers/prag | tasksource/imppres | implicature_quantifiers | prag | imppres__prag | Classification | | 94 | imppres/implicature_numerals_10_100/prag | tasksource/imppres | implicature_numerals_10_100 | prag | imppres__prag | Classification | -| 95 | imppres/implicature_numerals_2_3/prag | tasksource/imppres | implicature_numerals_2_3 | prag | imppres__prag | Classification | -| 96 | imppres/implicature_quantifiers/prag | tasksource/imppres | implicature_quantifiers | prag | imppres__prag | Classification | -| 97 | imppres/implicature_modals/log | tasksource/imppres | implicature_modals | log | imppres__log | Classification | -| 98 | imppres/implicature_numerals_10_100/log | tasksource/imppres | implicature_numerals_10_100 | log | imppres__log | Classification | +| 95 | imppres/implicature_gradable_adjective/prag | tasksource/imppres | implicature_gradable_adjective | prag | imppres__prag | Classification | +| 96 | imppres/implicature_gradable_verb/prag | tasksource/imppres | implicature_gradable_verb | prag | imppres__prag | Classification | +| 97 | imppres/implicature_gradable_adjective/log | tasksource/imppres | implicature_gradable_adjective | log | imppres__log | Classification | +| 98 | imppres/implicature_modals/log | tasksource/imppres | implicature_modals | log | imppres__log | Classification | | 99 | imppres/implicature_connectives/log | tasksource/imppres | implicature_connectives | log | imppres__log | Classification | -| 100 | imppres/implicature_gradable_adjective/log | tasksource/imppres | implicature_gradable_adjective | log | imppres__log | Classification | -| 101 | imppres/implicature_gradable_verb/log | tasksource/imppres | implicature_gradable_verb | log | imppres__log | Classification | -| 102 | imppres/implicature_numerals_2_3/log | tasksource/imppres | implicature_numerals_2_3 | log | imppres__log | Classification | +| 100 | imppres/implicature_numerals_2_3/log | tasksource/imppres | implicature_numerals_2_3 | log | imppres__log | Classification | +| 101 | imppres/implicature_numerals_10_100/log | tasksource/imppres | implicature_numerals_10_100 | log | imppres__log | Classification | +| 102 | imppres/implicature_gradable_verb/log | tasksource/imppres | implicature_gradable_verb | log | imppres__log | Classification | | 103 | imppres/implicature_quantifiers/log | tasksource/imppres | implicature_quantifiers | log | imppres__log | Classification | | 104 | hlgd | hlgd | | | hlgd | Classification | | 105 | paws/labeled_final | paws | labeled_final | | paws___labeled_final | Classification | @@ -114,128 +114,128 @@ | 111 | model-written-evals | Anthropic/model-written-evals | | | model_written_evals | MultipleChoice | | 112 | truthful_qa/multiple_choice | truthful_qa | multiple_choice | | truthful_qa___multiple_choice | MultipleChoice | | 113 | fig-qa | nightingal3/fig-qa | | | fig_qa | MultipleChoice | -| 114 | bigbench/dark_humor_detection | tasksource/bigbench | dark_humor_detection | | bigbench | MultipleChoice | -| 115 | bigbench/cause_and_effect | tasksource/bigbench | cause_and_effect | | bigbench | MultipleChoice | -| 116 | bigbench/phrase_relatedness | tasksource/bigbench | phrase_relatedness | | bigbench | MultipleChoice | -| 117 | bigbench/navigate | tasksource/bigbench | navigate | | bigbench | MultipleChoice | -| 118 | bigbench/date_understanding | tasksource/bigbench | date_understanding | | bigbench | MultipleChoice | -| 119 | bigbench/reasoning_about_colored_objects | tasksource/bigbench | reasoning_about_colored_objects | | bigbench | MultipleChoice | -| 120 | bigbench/understanding_fables | tasksource/bigbench | understanding_fables | | bigbench | MultipleChoice | -| 121 | bigbench/unit_interpretation | tasksource/bigbench | unit_interpretation | | bigbench | MultipleChoice | -| 122 | bigbench/gre_reading_comprehension | tasksource/bigbench | gre_reading_comprehension | | bigbench | MultipleChoice | -| 123 | bigbench/abstract_narrative_understanding | tasksource/bigbench | abstract_narrative_understanding | | bigbench | MultipleChoice | -| 124 | bigbench/cifar10_classification | tasksource/bigbench | cifar10_classification | | bigbench | MultipleChoice | -| 125 | bigbench/fact_checker | tasksource/bigbench | fact_checker | | bigbench | MultipleChoice | -| 126 | bigbench/snarks | tasksource/bigbench | snarks | | bigbench | MultipleChoice | -| 127 | bigbench/vitaminc_fact_verification | tasksource/bigbench | vitaminc_fact_verification | | bigbench | MultipleChoice | -| 128 | bigbench/empirical_judgments | tasksource/bigbench | empirical_judgments | | bigbench | MultipleChoice | -| 129 | bigbench/irony_identification | tasksource/bigbench | irony_identification | | bigbench | MultipleChoice | -| 130 | bigbench/identify_math_theorems | tasksource/bigbench | identify_math_theorems | | bigbench | MultipleChoice | -| 131 | bigbench/winowhy | tasksource/bigbench | winowhy | | bigbench | MultipleChoice | +| 114 | bigbench/misconceptions | tasksource/bigbench | misconceptions | | bigbench | MultipleChoice | +| 115 | bigbench/social_support | tasksource/bigbench | social_support | | bigbench | MultipleChoice | +| 116 | bigbench/dark_humor_detection | tasksource/bigbench | dark_humor_detection | | bigbench | MultipleChoice | +| 117 | bigbench/general_knowledge | tasksource/bigbench | general_knowledge | | bigbench | MultipleChoice | +| 118 | bigbench/code_line_description | tasksource/bigbench | code_line_description | | bigbench | MultipleChoice | +| 119 | bigbench/unit_interpretation | tasksource/bigbench | unit_interpretation | | bigbench | MultipleChoice | +| 120 | bigbench/date_understanding | tasksource/bigbench | date_understanding | | bigbench | MultipleChoice | +| 121 | bigbench/tracking_shuffled_objects | tasksource/bigbench | tracking_shuffled_objects | | bigbench | MultipleChoice | +| 122 | bigbench/formal_fallacies_syllogisms_negation | tasksource/bigbench | formal_fallacies_syllogisms_negation | | bigbench | MultipleChoice | +| 123 | bigbench/nonsense_words_grammar | tasksource/bigbench | nonsense_words_grammar | | bigbench | MultipleChoice | +| 124 | bigbench/anachronisms | tasksource/bigbench | anachronisms | | bigbench | MultipleChoice | +| 125 | bigbench/discourse_marker_prediction | tasksource/bigbench | discourse_marker_prediction | | bigbench | MultipleChoice | +| 126 | bigbench/metaphor_understanding | tasksource/bigbench | metaphor_understanding | | bigbench | MultipleChoice | +| 127 | bigbench/logical_args | tasksource/bigbench | logical_args | | bigbench | MultipleChoice | +| 128 | bigbench/mnist_ascii | tasksource/bigbench | mnist_ascii | | bigbench | MultipleChoice | +| 129 | bigbench/sentence_ambiguity | tasksource/bigbench | sentence_ambiguity | | bigbench | MultipleChoice | +| 130 | bigbench/presuppositions_as_nli | tasksource/bigbench | presuppositions_as_nli | | bigbench | MultipleChoice | +| 131 | bigbench/gre_reading_comprehension | tasksource/bigbench | gre_reading_comprehension | | bigbench | MultipleChoice | | 132 | bigbench/emojis_emotion_prediction | tasksource/bigbench | emojis_emotion_prediction | | bigbench | MultipleChoice | -| 133 | bigbench/question_selection | tasksource/bigbench | question_selection | | bigbench | MultipleChoice | -| 134 | bigbench/anachronisms | tasksource/bigbench | anachronisms | | bigbench | MultipleChoice | -| 135 | bigbench/goal_step_wikihow | tasksource/bigbench | goal_step_wikihow | | bigbench | MultipleChoice | -| 136 | bigbench/presuppositions_as_nli | tasksource/bigbench | presuppositions_as_nli | | bigbench | MultipleChoice | -| 137 | bigbench/ruin_names | tasksource/bigbench | ruin_names | | bigbench | MultipleChoice | -| 138 | bigbench/evaluating_information_essentiality | tasksource/bigbench | evaluating_information_essentiality | | bigbench | MultipleChoice | -| 139 | bigbench/geometric_shapes | tasksource/bigbench | geometric_shapes | | bigbench | MultipleChoice | -| 140 | bigbench/physics | tasksource/bigbench | physics | | bigbench | MultipleChoice | -| 141 | bigbench/hhh_alignment | tasksource/bigbench | hhh_alignment | | bigbench | MultipleChoice | -| 142 | bigbench/implicit_relations | tasksource/bigbench | implicit_relations | | bigbench | MultipleChoice | -| 143 | bigbench/elementary_math_qa | tasksource/bigbench | elementary_math_qa | | bigbench | MultipleChoice | -| 144 | bigbench/symbol_interpretation | tasksource/bigbench | symbol_interpretation | | bigbench | MultipleChoice | -| 145 | bigbench/misconceptions | tasksource/bigbench | misconceptions | | bigbench | MultipleChoice | -| 146 | bigbench/odd_one_out | tasksource/bigbench | odd_one_out | | bigbench | MultipleChoice | -| 147 | bigbench/implicatures | tasksource/bigbench | implicatures | | bigbench | MultipleChoice | -| 148 | bigbench/temporal_sequences | tasksource/bigbench | temporal_sequences | | bigbench | MultipleChoice | -| 149 | bigbench/human_organs_senses | tasksource/bigbench | human_organs_senses | | bigbench | MultipleChoice | -| 150 | bigbench/social_iqa | tasksource/bigbench | social_iqa | | bigbench | MultipleChoice | -| 151 | bigbench/salient_translation_error_detection | tasksource/bigbench | salient_translation_error_detection | | bigbench | MultipleChoice | -| 152 | bigbench/hindu_knowledge | tasksource/bigbench | hindu_knowledge | | bigbench | MultipleChoice | -| 153 | bigbench/figure_of_speech_detection | tasksource/bigbench | figure_of_speech_detection | | bigbench | MultipleChoice | -| 154 | bigbench/penguins_in_a_table | tasksource/bigbench | penguins_in_a_table | | bigbench | MultipleChoice | -| 155 | bigbench/similarities_abstraction | tasksource/bigbench | similarities_abstraction | | bigbench | MultipleChoice | -| 156 | bigbench/movie_recommendation | tasksource/bigbench | movie_recommendation | | bigbench | MultipleChoice | -| 157 | bigbench/code_line_description | tasksource/bigbench | code_line_description | | bigbench | MultipleChoice | -| 158 | bigbench/logic_grid_puzzle | tasksource/bigbench | logic_grid_puzzle | | bigbench | MultipleChoice | -| 159 | bigbench/conceptual_combinations | tasksource/bigbench | conceptual_combinations | | bigbench | MultipleChoice | -| 160 | bigbench/logical_sequence | tasksource/bigbench | logical_sequence | | bigbench | MultipleChoice | -| 161 | bigbench/moral_permissibility | tasksource/bigbench | moral_permissibility | | bigbench | MultipleChoice | -| 162 | bigbench/intent_recognition | tasksource/bigbench | intent_recognition | | bigbench | MultipleChoice | -| 163 | bigbench/arithmetic | tasksource/bigbench | arithmetic | | bigbench | MultipleChoice | -| 164 | bigbench/english_proverbs | tasksource/bigbench | english_proverbs | | bigbench | MultipleChoice | -| 165 | bigbench/general_knowledge | tasksource/bigbench | general_knowledge | | bigbench | MultipleChoice | -| 166 | bigbench/analogical_similarity | tasksource/bigbench | analogical_similarity | | bigbench | MultipleChoice | -| 167 | bigbench/checkmate_in_one | tasksource/bigbench | checkmate_in_one | | bigbench | MultipleChoice | -| 168 | bigbench/physical_intuition | tasksource/bigbench | physical_intuition | | bigbench | MultipleChoice | -| 169 | bigbench/known_unknowns | tasksource/bigbench | known_unknowns | | bigbench | MultipleChoice | -| 170 | bigbench/crash_blossom | tasksource/bigbench | crash_blossom | | bigbench | MultipleChoice | -| 171 | bigbench/color | tasksource/bigbench | color | | bigbench | MultipleChoice | -| 172 | bigbench/suicide_risk | tasksource/bigbench | suicide_risk | | bigbench | MultipleChoice | -| 173 | bigbench/novel_concepts | tasksource/bigbench | novel_concepts | | bigbench | MultipleChoice | -| 174 | bigbench/fantasy_reasoning | tasksource/bigbench | fantasy_reasoning | | bigbench | MultipleChoice | -| 175 | bigbench/contextual_parametric_knowledge_conflicts | tasksource/bigbench | contextual_parametric_knowledge_conflicts | | bigbench | MultipleChoice | -| 176 | bigbench/cs_algorithms | tasksource/bigbench | cs_algorithms | | bigbench | MultipleChoice | -| 177 | bigbench/international_phonetic_alphabet_nli | tasksource/bigbench | international_phonetic_alphabet_nli | | bigbench | MultipleChoice | -| 178 | bigbench/identify_odd_metaphor | tasksource/bigbench | identify_odd_metaphor | | bigbench | MultipleChoice | -| 179 | bigbench/emoji_movie | tasksource/bigbench | emoji_movie | | bigbench | MultipleChoice | -| 180 | bigbench/mnist_ascii | tasksource/bigbench | mnist_ascii | | bigbench | MultipleChoice | -| 181 | bigbench/epistemic_reasoning | tasksource/bigbench | epistemic_reasoning | | bigbench | MultipleChoice | -| 182 | bigbench/discourse_marker_prediction | tasksource/bigbench | discourse_marker_prediction | | bigbench | MultipleChoice | -| 183 | bigbench/play_dialog_same_or_different | tasksource/bigbench | play_dialog_same_or_different | | bigbench | MultipleChoice | -| 184 | bigbench/tracking_shuffled_objects | tasksource/bigbench | tracking_shuffled_objects | | bigbench | MultipleChoice | -| 185 | bigbench/metaphor_understanding | tasksource/bigbench | metaphor_understanding | | bigbench | MultipleChoice | -| 186 | bigbench/timedial | tasksource/bigbench | timedial | | bigbench | MultipleChoice | -| 187 | bigbench/formal_fallacies_syllogisms_negation | tasksource/bigbench | formal_fallacies_syllogisms_negation | | bigbench | MultipleChoice | -| 188 | bigbench/simple_ethical_questions | tasksource/bigbench | simple_ethical_questions | | bigbench | MultipleChoice | -| 189 | bigbench/causal_judgment | tasksource/bigbench | causal_judgment | | bigbench | MultipleChoice | -| 190 | bigbench/mathematical_induction | tasksource/bigbench | mathematical_induction | | bigbench | MultipleChoice | -| 191 | bigbench/nonsense_words_grammar | tasksource/bigbench | nonsense_words_grammar | | bigbench | MultipleChoice | -| 192 | bigbench/metaphor_boolean | tasksource/bigbench | metaphor_boolean | | bigbench | MultipleChoice | -| 193 | bigbench/crass_ai | tasksource/bigbench | crass_ai | | bigbench | MultipleChoice | -| 194 | bigbench/disambiguation_qa | tasksource/bigbench | disambiguation_qa | | bigbench | MultipleChoice | -| 195 | bigbench/analytic_entailment | tasksource/bigbench | analytic_entailment | | bigbench | MultipleChoice | +| 133 | bigbench/play_dialog_same_or_different | tasksource/bigbench | play_dialog_same_or_different | | bigbench | MultipleChoice | +| 134 | bigbench/geometric_shapes | tasksource/bigbench | geometric_shapes | | bigbench | MultipleChoice | +| 135 | bigbench/authorship_verification | tasksource/bigbench | authorship_verification | | bigbench | MultipleChoice | +| 136 | bigbench/logical_deduction | tasksource/bigbench | logical_deduction | | bigbench | MultipleChoice | +| 137 | bigbench/logical_sequence | tasksource/bigbench | logical_sequence | | bigbench | MultipleChoice | +| 138 | bigbench/strategyqa | tasksource/bigbench | strategyqa | | bigbench | MultipleChoice | +| 139 | bigbench/fantasy_reasoning | tasksource/bigbench | fantasy_reasoning | | bigbench | MultipleChoice | +| 140 | bigbench/timedial | tasksource/bigbench | timedial | | bigbench | MultipleChoice | +| 141 | bigbench/emoji_movie | tasksource/bigbench | emoji_movie | | bigbench | MultipleChoice | +| 142 | bigbench/entailed_polarity | tasksource/bigbench | entailed_polarity | | bigbench | MultipleChoice | +| 143 | bigbench/fact_checker | tasksource/bigbench | fact_checker | | bigbench | MultipleChoice | +| 144 | bigbench/movie_dialog_same_or_different | tasksource/bigbench | movie_dialog_same_or_different | | bigbench | MultipleChoice | +| 145 | bigbench/moral_permissibility | tasksource/bigbench | moral_permissibility | | bigbench | MultipleChoice | +| 146 | bigbench/human_organs_senses | tasksource/bigbench | human_organs_senses | | bigbench | MultipleChoice | +| 147 | bigbench/analogical_similarity | tasksource/bigbench | analogical_similarity | | bigbench | MultipleChoice | +| 148 | bigbench/social_iqa | tasksource/bigbench | social_iqa | | bigbench | MultipleChoice | +| 149 | bigbench/analytic_entailment | tasksource/bigbench | analytic_entailment | | bigbench | MultipleChoice | +| 150 | bigbench/penguins_in_a_table | tasksource/bigbench | penguins_in_a_table | | bigbench | MultipleChoice | +| 151 | bigbench/intent_recognition | tasksource/bigbench | intent_recognition | | bigbench | MultipleChoice | +| 152 | bigbench/ruin_names | tasksource/bigbench | ruin_names | | bigbench | MultipleChoice | +| 153 | bigbench/checkmate_in_one | tasksource/bigbench | checkmate_in_one | | bigbench | MultipleChoice | +| 154 | bigbench/metaphor_boolean | tasksource/bigbench | metaphor_boolean | | bigbench | MultipleChoice | +| 155 | bigbench/cs_algorithms | tasksource/bigbench | cs_algorithms | | bigbench | MultipleChoice | +| 156 | bigbench/cause_and_effect | tasksource/bigbench | cause_and_effect | | bigbench | MultipleChoice | +| 157 | bigbench/disambiguation_qa | tasksource/bigbench | disambiguation_qa | | bigbench | MultipleChoice | +| 158 | bigbench/undo_permutation | tasksource/bigbench | undo_permutation | | bigbench | MultipleChoice | +| 159 | bigbench/odd_one_out | tasksource/bigbench | odd_one_out | | bigbench | MultipleChoice | +| 160 | bigbench/international_phonetic_alphabet_nli | tasksource/bigbench | international_phonetic_alphabet_nli | | bigbench | MultipleChoice | +| 161 | bigbench/goal_step_wikihow | tasksource/bigbench | goal_step_wikihow | | bigbench | MultipleChoice | +| 162 | bigbench/implicatures | tasksource/bigbench | implicatures | | bigbench | MultipleChoice | +| 163 | bigbench/physical_intuition | tasksource/bigbench | physical_intuition | | bigbench | MultipleChoice | +| 164 | bigbench/real_or_fake_text | tasksource/bigbench | real_or_fake_text | | bigbench | MultipleChoice | +| 165 | bigbench/reasoning_about_colored_objects | tasksource/bigbench | reasoning_about_colored_objects | | bigbench | MultipleChoice | +| 166 | bigbench/temporal_sequences | tasksource/bigbench | temporal_sequences | | bigbench | MultipleChoice | +| 167 | bigbench/sports_understanding | tasksource/bigbench | sports_understanding | | bigbench | MultipleChoice | +| 168 | bigbench/mathematical_induction | tasksource/bigbench | mathematical_induction | | bigbench | MultipleChoice | +| 169 | bigbench/snarks | tasksource/bigbench | snarks | | bigbench | MultipleChoice | +| 170 | bigbench/identify_odd_metaphor | tasksource/bigbench | identify_odd_metaphor | | bigbench | MultipleChoice | +| 171 | bigbench/logical_fallacy_detection | tasksource/bigbench | logical_fallacy_detection | | bigbench | MultipleChoice | +| 172 | bigbench/understanding_fables | tasksource/bigbench | understanding_fables | | bigbench | MultipleChoice | +| 173 | bigbench/conceptual_combinations | tasksource/bigbench | conceptual_combinations | | bigbench | MultipleChoice | +| 174 | bigbench/key_value_maps | tasksource/bigbench | key_value_maps | | bigbench | MultipleChoice | +| 175 | bigbench/logic_grid_puzzle | tasksource/bigbench | logic_grid_puzzle | | bigbench | MultipleChoice | +| 176 | bigbench/salient_translation_error_detection | tasksource/bigbench | salient_translation_error_detection | | bigbench | MultipleChoice | +| 177 | bigbench/vitaminc_fact_verification | tasksource/bigbench | vitaminc_fact_verification | | bigbench | MultipleChoice | +| 178 | bigbench/epistemic_reasoning | tasksource/bigbench | epistemic_reasoning | | bigbench | MultipleChoice | +| 179 | bigbench/abstract_narrative_understanding | tasksource/bigbench | abstract_narrative_understanding | | bigbench | MultipleChoice | +| 180 | bigbench/simple_ethical_questions | tasksource/bigbench | simple_ethical_questions | | bigbench | MultipleChoice | +| 181 | bigbench/suicide_risk | tasksource/bigbench | suicide_risk | | bigbench | MultipleChoice | +| 182 | bigbench/question_selection | tasksource/bigbench | question_selection | | bigbench | MultipleChoice | +| 183 | bigbench/winowhy | tasksource/bigbench | winowhy | | bigbench | MultipleChoice | +| 184 | bigbench/crash_blossom | tasksource/bigbench | crash_blossom | | bigbench | MultipleChoice | +| 185 | bigbench/riddle_sense | tasksource/bigbench | riddle_sense | | bigbench | MultipleChoice | +| 186 | bigbench/color | tasksource/bigbench | color | | bigbench | MultipleChoice | +| 187 | bigbench/strange_stories | tasksource/bigbench | strange_stories | | bigbench | MultipleChoice | +| 188 | bigbench/arithmetic | tasksource/bigbench | arithmetic | | bigbench | MultipleChoice | +| 189 | bigbench/irony_identification | tasksource/bigbench | irony_identification | | bigbench | MultipleChoice | +| 190 | bigbench/implicit_relations | tasksource/bigbench | implicit_relations | | bigbench | MultipleChoice | +| 191 | bigbench/causal_judgment | tasksource/bigbench | causal_judgment | | bigbench | MultipleChoice | +| 192 | bigbench/figure_of_speech_detection | tasksource/bigbench | figure_of_speech_detection | | bigbench | MultipleChoice | +| 193 | bigbench/hindu_knowledge | tasksource/bigbench | hindu_knowledge | | bigbench | MultipleChoice | +| 194 | bigbench/evaluating_information_essentiality | tasksource/bigbench | evaluating_information_essentiality | | bigbench | MultipleChoice | +| 195 | bigbench/similarities_abstraction | tasksource/bigbench | similarities_abstraction | | bigbench | MultipleChoice | | 196 | bigbench/bbq_lite_json | tasksource/bigbench | bbq_lite_json | | bigbench | MultipleChoice | -| 197 | bigbench/entailed_polarity | tasksource/bigbench | entailed_polarity | | bigbench | MultipleChoice | -| 198 | bigbench/logical_deduction | tasksource/bigbench | logical_deduction | | bigbench | MultipleChoice | -| 199 | bigbench/authorship_verification | tasksource/bigbench | authorship_verification | | bigbench | MultipleChoice | -| 200 | bigbench/sports_understanding | tasksource/bigbench | sports_understanding | | bigbench | MultipleChoice | -| 201 | bigbench/movie_dialog_same_or_different | tasksource/bigbench | movie_dialog_same_or_different | | bigbench | MultipleChoice | -| 202 | bigbench/strategyqa | tasksource/bigbench | strategyqa | | bigbench | MultipleChoice | -| 203 | bigbench/undo_permutation | tasksource/bigbench | undo_permutation | | bigbench | MultipleChoice | -| 204 | bigbench/logical_fallacy_detection | tasksource/bigbench | logical_fallacy_detection | | bigbench | MultipleChoice | -| 205 | bigbench/logical_args | tasksource/bigbench | logical_args | | bigbench | MultipleChoice | -| 206 | bigbench/riddle_sense | tasksource/bigbench | riddle_sense | | bigbench | MultipleChoice | -| 207 | bigbench/sentence_ambiguity | tasksource/bigbench | sentence_ambiguity | | bigbench | MultipleChoice | -| 208 | bigbench/key_value_maps | tasksource/bigbench | key_value_maps | | bigbench | MultipleChoice | -| 209 | bigbench/strange_stories | tasksource/bigbench | strange_stories | | bigbench | MultipleChoice | -| 210 | bigbench/hyperbaton | tasksource/bigbench | hyperbaton | | bigbench | MultipleChoice | +| 197 | bigbench/novel_concepts | tasksource/bigbench | novel_concepts | | bigbench | MultipleChoice | +| 198 | bigbench/english_proverbs | tasksource/bigbench | english_proverbs | | bigbench | MultipleChoice | +| 199 | bigbench/known_unknowns | tasksource/bigbench | known_unknowns | | bigbench | MultipleChoice | +| 200 | bigbench/navigate | tasksource/bigbench | navigate | | bigbench | MultipleChoice | +| 201 | bigbench/phrase_relatedness | tasksource/bigbench | phrase_relatedness | | bigbench | MultipleChoice | +| 202 | bigbench/contextual_parametric_knowledge_conflicts | tasksource/bigbench | contextual_parametric_knowledge_conflicts | | bigbench | MultipleChoice | +| 203 | bigbench/symbol_interpretation | tasksource/bigbench | symbol_interpretation | | bigbench | MultipleChoice | +| 204 | bigbench/hhh_alignment | tasksource/bigbench | hhh_alignment | | bigbench | MultipleChoice | +| 205 | bigbench/hyperbaton | tasksource/bigbench | hyperbaton | | bigbench | MultipleChoice | +| 206 | bigbench/empirical_judgments | tasksource/bigbench | empirical_judgments | | bigbench | MultipleChoice | +| 207 | bigbench/identify_math_theorems | tasksource/bigbench | identify_math_theorems | | bigbench | MultipleChoice | +| 208 | bigbench/crass_ai | tasksource/bigbench | crass_ai | | bigbench | MultipleChoice | +| 209 | bigbench/elementary_math_qa | tasksource/bigbench | elementary_math_qa | | bigbench | MultipleChoice | +| 210 | bigbench/movie_recommendation | tasksource/bigbench | movie_recommendation | | bigbench | MultipleChoice | | 211 | bigbench/dyck_languages | tasksource/bigbench | dyck_languages | | bigbench | MultipleChoice | -| 212 | bigbench/real_or_fake_text | tasksource/bigbench | real_or_fake_text | | bigbench | MultipleChoice | -| 213 | bigbench/social_support | tasksource/bigbench | social_support | | bigbench | MultipleChoice | -| 214 | blimp/drop_argument | blimp | drop_argument | | blimp_hard | MultipleChoice | -| 215 | blimp/sentential_negation_npi_scope | blimp | sentential_negation_npi_scope | | blimp_hard | MultipleChoice | +| 212 | bigbench/cifar10_classification | tasksource/bigbench | cifar10_classification | | bigbench | MultipleChoice | +| 213 | bigbench/physics | tasksource/bigbench | physics | | bigbench | MultipleChoice | +| 214 | blimp/sentential_subject_island | blimp | sentential_subject_island | | blimp_hard | MultipleChoice | +| 215 | blimp/tough_vs_raising_1 | blimp | tough_vs_raising_1 | | blimp_hard | MultipleChoice | | 216 | blimp/wh_vs_that_with_gap | blimp | wh_vs_that_with_gap | | blimp_hard | MultipleChoice | -| 217 | blimp/inchoative | blimp | inchoative | | blimp_hard | MultipleChoice | -| 218 | blimp/existential_there_quantifiers_2 | blimp | existential_there_quantifiers_2 | | blimp_hard | MultipleChoice | -| 219 | blimp/sentential_subject_island | blimp | sentential_subject_island | | blimp_hard | MultipleChoice | -| 220 | blimp/superlative_quantifiers_1 | blimp | superlative_quantifiers_1 | | blimp_hard | MultipleChoice | -| 221 | blimp/matrix_question_npi_licensor_present | blimp | matrix_question_npi_licensor_present | | blimp_hard | MultipleChoice | -| 222 | blimp/principle_A_c_command | blimp | principle_A_c_command | | blimp_hard | MultipleChoice | -| 223 | blimp/npi_present_2 | blimp | npi_present_2 | | blimp_hard | MultipleChoice | -| 224 | blimp/complex_NP_island | blimp | complex_NP_island | | blimp_hard | MultipleChoice | -| 225 | blimp/coordinate_structure_constraint_object_extraction | blimp | coordinate_structure_constraint_object_extraction | | blimp_hard | MultipleChoice | -| 226 | blimp/principle_A_reconstruction | blimp | principle_A_reconstruction | | blimp_hard | MultipleChoice | -| 227 | blimp/tough_vs_raising_1 | blimp | tough_vs_raising_1 | | blimp_hard | MultipleChoice | -| 228 | blimp/principle_A_domain_2 | blimp | principle_A_domain_2 | | blimp_hard | MultipleChoice | -| 229 | blimp/wh_vs_that_with_gap_long_distance | blimp | wh_vs_that_with_gap_long_distance | | blimp_hard | MultipleChoice | +| 217 | blimp/wh_questions_object_gap | blimp | wh_questions_object_gap | | blimp_hard | MultipleChoice | +| 218 | blimp/principle_A_c_command | blimp | principle_A_c_command | | blimp_hard | MultipleChoice | +| 219 | blimp/existential_there_quantifiers_2 | blimp | existential_there_quantifiers_2 | | blimp_hard | MultipleChoice | +| 220 | blimp/principle_A_domain_2 | blimp | principle_A_domain_2 | | blimp_hard | MultipleChoice | +| 221 | blimp/left_branch_island_echo_question | blimp | left_branch_island_echo_question | | blimp_hard | MultipleChoice | +| 222 | blimp/npi_present_2 | blimp | npi_present_2 | | blimp_hard | MultipleChoice | +| 223 | blimp/complex_NP_island | blimp | complex_NP_island | | blimp_hard | MultipleChoice | +| 224 | blimp/animate_subject_passive | blimp | animate_subject_passive | | blimp_hard | MultipleChoice | +| 225 | blimp/coordinate_structure_constraint_complex_left_branch | blimp | coordinate_structure_constraint_complex_left_branch | | blimp_hard | MultipleChoice | +| 226 | blimp/coordinate_structure_constraint_object_extraction | blimp | coordinate_structure_constraint_object_extraction | | blimp_hard | MultipleChoice | +| 227 | blimp/superlative_quantifiers_1 | blimp | superlative_quantifiers_1 | | blimp_hard | MultipleChoice | +| 228 | blimp/drop_argument | blimp | drop_argument | | blimp_hard | MultipleChoice | +| 229 | blimp/sentential_negation_npi_scope | blimp | sentential_negation_npi_scope | | blimp_hard | MultipleChoice | | 230 | blimp/npi_present_1 | blimp | npi_present_1 | | blimp_hard | MultipleChoice | -| 231 | blimp/left_branch_island_echo_question | blimp | left_branch_island_echo_question | | blimp_hard | MultipleChoice | -| 232 | blimp/wh_questions_object_gap | blimp | wh_questions_object_gap | | blimp_hard | MultipleChoice | -| 233 | blimp/wh_questions_subject_gap_long_distance | blimp | wh_questions_subject_gap_long_distance | | blimp_hard | MultipleChoice | -| 234 | blimp/animate_subject_passive | blimp | animate_subject_passive | | blimp_hard | MultipleChoice | -| 235 | blimp/coordinate_structure_constraint_complex_left_branch | blimp | coordinate_structure_constraint_complex_left_branch | | blimp_hard | MultipleChoice | +| 231 | blimp/matrix_question_npi_licensor_present | blimp | matrix_question_npi_licensor_present | | blimp_hard | MultipleChoice | +| 232 | blimp/inchoative | blimp | inchoative | | blimp_hard | MultipleChoice | +| 233 | blimp/wh_vs_that_with_gap_long_distance | blimp | wh_vs_that_with_gap_long_distance | | blimp_hard | MultipleChoice | +| 234 | blimp/principle_A_reconstruction | blimp | principle_A_reconstruction | | blimp_hard | MultipleChoice | +| 235 | blimp/wh_questions_subject_gap_long_distance | blimp | wh_questions_subject_gap_long_distance | | blimp_hard | MultipleChoice | | 236 | cos_e/v1.0 | cos_e | v1.0 | | cos_e | MultipleChoice | | 237 | cosmos_qa | cosmos_qa | | | cosmos_qa | MultipleChoice | | 238 | dream | dream | | | dream | MultipleChoice | @@ -254,63 +254,63 @@ | 251 | balanced-copa | pkavumba/balanced-copa | | | balanced_copa | MultipleChoice | | 252 | e-CARE | 12ml/e-CARE | | | e_care | MultipleChoice | | 253 | art | art | | | art | MultipleChoice | -| 254 | mmlu/high_school_psychology | tasksource/mmlu | high_school_psychology | | mmlu | MultipleChoice | +| 254 | mmlu/prehistory | tasksource/mmlu | prehistory | | mmlu | MultipleChoice | | 255 | mmlu/clinical_knowledge | tasksource/mmlu | clinical_knowledge | | mmlu | MultipleChoice | | 256 | mmlu/college_biology | tasksource/mmlu | college_biology | | mmlu | MultipleChoice | | 257 | mmlu/college_chemistry | tasksource/mmlu | college_chemistry | | mmlu | MultipleChoice | | 258 | mmlu/college_computer_science | tasksource/mmlu | college_computer_science | | mmlu | MultipleChoice | -| 259 | mmlu/college_mathematics | tasksource/mmlu | college_mathematics | | mmlu | MultipleChoice | -| 260 | mmlu/college_medicine | tasksource/mmlu | college_medicine | | mmlu | MultipleChoice | -| 261 | mmlu/college_physics | tasksource/mmlu | college_physics | | mmlu | MultipleChoice | -| 262 | mmlu/computer_security | tasksource/mmlu | computer_security | | mmlu | MultipleChoice | -| 263 | mmlu/philosophy | tasksource/mmlu | philosophy | | mmlu | MultipleChoice | -| 264 | mmlu/medical_genetics | tasksource/mmlu | medical_genetics | | mmlu | MultipleChoice | -| 265 | mmlu/miscellaneous | tasksource/mmlu | miscellaneous | | mmlu | MultipleChoice | -| 266 | mmlu/moral_disputes | tasksource/mmlu | moral_disputes | | mmlu | MultipleChoice | -| 267 | mmlu/moral_scenarios | tasksource/mmlu | moral_scenarios | | mmlu | MultipleChoice | -| 268 | mmlu/nutrition | tasksource/mmlu | nutrition | | mmlu | MultipleChoice | -| 269 | mmlu/high_school_chemistry | tasksource/mmlu | high_school_chemistry | | mmlu | MultipleChoice | -| 270 | mmlu/high_school_statistics | tasksource/mmlu | high_school_statistics | | mmlu | MultipleChoice | -| 271 | mmlu/high_school_us_history | tasksource/mmlu | high_school_us_history | | mmlu | MultipleChoice | -| 272 | mmlu/professional_law | tasksource/mmlu | professional_law | | mmlu | MultipleChoice | -| 273 | mmlu/professional_medicine | tasksource/mmlu | professional_medicine | | mmlu | MultipleChoice | -| 274 | mmlu/professional_psychology | tasksource/mmlu | professional_psychology | | mmlu | MultipleChoice | -| 275 | mmlu/public_relations | tasksource/mmlu | public_relations | | mmlu | MultipleChoice | -| 276 | mmlu/security_studies | tasksource/mmlu | security_studies | | mmlu | MultipleChoice | -| 277 | mmlu/machine_learning | tasksource/mmlu | machine_learning | | mmlu | MultipleChoice | -| 278 | mmlu/management | tasksource/mmlu | management | | mmlu | MultipleChoice | -| 279 | mmlu/marketing | tasksource/mmlu | marketing | | mmlu | MultipleChoice | -| 280 | mmlu/sociology | tasksource/mmlu | sociology | | mmlu | MultipleChoice | -| 281 | mmlu/us_foreign_policy | tasksource/mmlu | us_foreign_policy | | mmlu | MultipleChoice | -| 282 | mmlu/virology | tasksource/mmlu | virology | | mmlu | MultipleChoice | -| 283 | mmlu/world_religions | tasksource/mmlu | world_religions | | mmlu | MultipleChoice | -| 284 | mmlu/high_school_biology | tasksource/mmlu | high_school_biology | | mmlu | MultipleChoice | -| 285 | mmlu/logical_fallacies | tasksource/mmlu | logical_fallacies | | mmlu | MultipleChoice | +| 259 | mmlu/medical_genetics | tasksource/mmlu | medical_genetics | | mmlu | MultipleChoice | +| 260 | mmlu/miscellaneous | tasksource/mmlu | miscellaneous | | mmlu | MultipleChoice | +| 261 | mmlu/moral_disputes | tasksource/mmlu | moral_disputes | | mmlu | MultipleChoice | +| 262 | mmlu/moral_scenarios | tasksource/mmlu | moral_scenarios | | mmlu | MultipleChoice | +| 263 | mmlu/high_school_physics | tasksource/mmlu | high_school_physics | | mmlu | MultipleChoice | +| 264 | mmlu/high_school_psychology | tasksource/mmlu | high_school_psychology | | mmlu | MultipleChoice | +| 265 | mmlu/high_school_statistics | tasksource/mmlu | high_school_statistics | | mmlu | MultipleChoice | +| 266 | mmlu/high_school_us_history | tasksource/mmlu | high_school_us_history | | mmlu | MultipleChoice | +| 267 | mmlu/college_mathematics | tasksource/mmlu | college_mathematics | | mmlu | MultipleChoice | +| 268 | mmlu/college_medicine | tasksource/mmlu | college_medicine | | mmlu | MultipleChoice | +| 269 | mmlu/college_physics | tasksource/mmlu | college_physics | | mmlu | MultipleChoice | +| 270 | mmlu/business_ethics | tasksource/mmlu | business_ethics | | mmlu | MultipleChoice | +| 271 | mmlu/professional_law | tasksource/mmlu | professional_law | | mmlu | MultipleChoice | +| 272 | mmlu/professional_medicine | tasksource/mmlu | professional_medicine | | mmlu | MultipleChoice | +| 273 | mmlu/professional_psychology | tasksource/mmlu | professional_psychology | | mmlu | MultipleChoice | +| 274 | mmlu/public_relations | tasksource/mmlu | public_relations | | mmlu | MultipleChoice | +| 275 | mmlu/logical_fallacies | tasksource/mmlu | logical_fallacies | | mmlu | MultipleChoice | +| 276 | mmlu/machine_learning | tasksource/mmlu | machine_learning | | mmlu | MultipleChoice | +| 277 | mmlu/management | tasksource/mmlu | management | | mmlu | MultipleChoice | +| 278 | mmlu/marketing | tasksource/mmlu | marketing | | mmlu | MultipleChoice | +| 279 | mmlu/sociology | tasksource/mmlu | sociology | | mmlu | MultipleChoice | +| 280 | mmlu/us_foreign_policy | tasksource/mmlu | us_foreign_policy | | mmlu | MultipleChoice | +| 281 | mmlu/virology | tasksource/mmlu | virology | | mmlu | MultipleChoice | +| 282 | mmlu/world_religions | tasksource/mmlu | world_religions | | mmlu | MultipleChoice | +| 283 | mmlu/nutrition | tasksource/mmlu | nutrition | | mmlu | MultipleChoice | +| 284 | mmlu/philosophy | tasksource/mmlu | philosophy | | mmlu | MultipleChoice | +| 285 | mmlu/high_school_biology | tasksource/mmlu | high_school_biology | | mmlu | MultipleChoice | | 286 | mmlu/jurisprudence | tasksource/mmlu | jurisprudence | | mmlu | MultipleChoice | | 287 | mmlu/international_law | tasksource/mmlu | international_law | | mmlu | MultipleChoice | -| 288 | mmlu/business_ethics | tasksource/mmlu | business_ethics | | mmlu | MultipleChoice | -| 289 | mmlu/astronomy | tasksource/mmlu | astronomy | | mmlu | MultipleChoice | -| 290 | mmlu/abstract_algebra | tasksource/mmlu | abstract_algebra | | mmlu | MultipleChoice | -| 291 | mmlu/professional_accounting | tasksource/mmlu | professional_accounting | | mmlu | MultipleChoice | -| 292 | mmlu/prehistory | tasksource/mmlu | prehistory | | mmlu | MultipleChoice | -| 293 | mmlu/anatomy | tasksource/mmlu | anatomy | | mmlu | MultipleChoice | -| 294 | mmlu/high_school_world_history | tasksource/mmlu | high_school_world_history | | mmlu | MultipleChoice | -| 295 | mmlu/human_aging | tasksource/mmlu | human_aging | | mmlu | MultipleChoice | -| 296 | mmlu/human_sexuality | tasksource/mmlu | human_sexuality | | mmlu | MultipleChoice | +| 288 | mmlu/anatomy | tasksource/mmlu | anatomy | | mmlu | MultipleChoice | +| 289 | mmlu/abstract_algebra | tasksource/mmlu | abstract_algebra | | mmlu | MultipleChoice | +| 290 | mmlu/security_studies | tasksource/mmlu | security_studies | | mmlu | MultipleChoice | +| 291 | mmlu/astronomy | tasksource/mmlu | astronomy | | mmlu | MultipleChoice | +| 292 | mmlu/professional_accounting | tasksource/mmlu | professional_accounting | | mmlu | MultipleChoice | +| 293 | mmlu/high_school_world_history | tasksource/mmlu | high_school_world_history | | mmlu | MultipleChoice | +| 294 | mmlu/human_aging | tasksource/mmlu | human_aging | | mmlu | MultipleChoice | +| 295 | mmlu/human_sexuality | tasksource/mmlu | human_sexuality | | mmlu | MultipleChoice | +| 296 | mmlu/high_school_chemistry | tasksource/mmlu | high_school_chemistry | | mmlu | MultipleChoice | | 297 | mmlu/high_school_computer_science | tasksource/mmlu | high_school_computer_science | | mmlu | MultipleChoice | -| 298 | mmlu/high_school_geography | tasksource/mmlu | high_school_geography | | mmlu | MultipleChoice | -| 299 | mmlu/global_facts | tasksource/mmlu | global_facts | | mmlu | MultipleChoice | -| 300 | mmlu/formal_logic | tasksource/mmlu | formal_logic | | mmlu | MultipleChoice | -| 301 | mmlu/elementary_mathematics | tasksource/mmlu | elementary_mathematics | | mmlu | MultipleChoice | -| 302 | mmlu/electrical_engineering | tasksource/mmlu | electrical_engineering | | mmlu | MultipleChoice | -| 303 | mmlu/econometrics | tasksource/mmlu | econometrics | | mmlu | MultipleChoice | -| 304 | mmlu/conceptual_physics | tasksource/mmlu | conceptual_physics | | mmlu | MultipleChoice | -| 305 | mmlu/high_school_physics | tasksource/mmlu | high_school_physics | | mmlu | MultipleChoice | -| 306 | mmlu/high_school_microeconomics | tasksource/mmlu | high_school_microeconomics | | mmlu | MultipleChoice | -| 307 | mmlu/high_school_mathematics | tasksource/mmlu | high_school_mathematics | | mmlu | MultipleChoice | -| 308 | mmlu/high_school_macroeconomics | tasksource/mmlu | high_school_macroeconomics | | mmlu | MultipleChoice | -| 309 | mmlu/high_school_government_and_politics | tasksource/mmlu | high_school_government_and_politics | | mmlu | MultipleChoice | -| 310 | mmlu/high_school_european_history | tasksource/mmlu | high_school_european_history | | mmlu | MultipleChoice | +| 298 | mmlu/high_school_european_history | tasksource/mmlu | high_school_european_history | | mmlu | MultipleChoice | +| 299 | mmlu/high_school_geography | tasksource/mmlu | high_school_geography | | mmlu | MultipleChoice | +| 300 | mmlu/high_school_government_and_politics | tasksource/mmlu | high_school_government_and_politics | | mmlu | MultipleChoice | +| 301 | mmlu/high_school_macroeconomics | tasksource/mmlu | high_school_macroeconomics | | mmlu | MultipleChoice | +| 302 | mmlu/high_school_mathematics | tasksource/mmlu | high_school_mathematics | | mmlu | MultipleChoice | +| 303 | mmlu/high_school_microeconomics | tasksource/mmlu | high_school_microeconomics | | mmlu | MultipleChoice | +| 304 | mmlu/computer_security | tasksource/mmlu | computer_security | | mmlu | MultipleChoice | +| 305 | mmlu/conceptual_physics | tasksource/mmlu | conceptual_physics | | mmlu | MultipleChoice | +| 306 | mmlu/econometrics | tasksource/mmlu | econometrics | | mmlu | MultipleChoice | +| 307 | mmlu/electrical_engineering | tasksource/mmlu | electrical_engineering | | mmlu | MultipleChoice | +| 308 | mmlu/elementary_mathematics | tasksource/mmlu | elementary_mathematics | | mmlu | MultipleChoice | +| 309 | mmlu/formal_logic | tasksource/mmlu | formal_logic | | mmlu | MultipleChoice | +| 310 | mmlu/global_facts | tasksource/mmlu | global_facts | | mmlu | MultipleChoice | | 311 | winogrande/winogrande_xl | winogrande | winogrande_xl | | winogrande | MultipleChoice | | 312 | codah/codah | codah | codah | | codah | MultipleChoice | | 313 | ai2_arc/ARC-Easy/challenge | ai2_arc | ARC-Easy | challenge | ai2_arc__challenge | MultipleChoice | @@ -343,34 +343,34 @@ | 340 | tweet_eval/stance_feminist | tweet_eval | stance_feminist | | tweet_eval_feminist | Classification | | 341 | tweet_eval/stance_hillary | tweet_eval | stance_hillary | | tweet_eval_hillary | Classification | | 342 | discovery/discovery | discovery | discovery | | discovery | Classification | -| 343 | pragmeval/emobank-valence | pragmeval | emobank-valence | | pragmeval_1 | Classification | -| 344 | pragmeval/squinky-informativeness | pragmeval | squinky-informativeness | | pragmeval_1 | Classification | -| 345 | pragmeval/squinky-implicature | pragmeval | squinky-implicature | | pragmeval_1 | Classification | -| 346 | pragmeval/mrda | pragmeval | mrda | | pragmeval_1 | Classification | -| 347 | pragmeval/switchboard | pragmeval | switchboard | | pragmeval_1 | Classification | -| 348 | pragmeval/squinky-formality | pragmeval | squinky-formality | | pragmeval_1 | Classification | -| 349 | pragmeval/emobank-arousal | pragmeval | emobank-arousal | | pragmeval_1 | Classification | -| 350 | pragmeval/emobank-dominance | pragmeval | emobank-dominance | | pragmeval_1 | Classification | -| 351 | pragmeval/verifiability | pragmeval | verifiability | | pragmeval_1 | Classification | -| 352 | pragmeval/persuasiveness-claimtype | pragmeval | persuasiveness-claimtype | | pragmeval_2 | Classification | -| 353 | pragmeval/persuasiveness-eloquence | pragmeval | persuasiveness-eloquence | | pragmeval_2 | Classification | -| 354 | pragmeval/persuasiveness-premisetype | pragmeval | persuasiveness-premisetype | | pragmeval_2 | Classification | -| 355 | pragmeval/pdtb | pragmeval | pdtb | | pragmeval_2 | Classification | -| 356 | pragmeval/persuasiveness-relevance | pragmeval | persuasiveness-relevance | | pragmeval_2 | Classification | +| 343 | pragmeval/emobank-dominance | pragmeval | emobank-dominance | | pragmeval_1 | Classification | +| 344 | pragmeval/emobank-valence | pragmeval | emobank-valence | | pragmeval_1 | Classification | +| 345 | pragmeval/squinky-formality | pragmeval | squinky-formality | | pragmeval_1 | Classification | +| 346 | pragmeval/squinky-implicature | pragmeval | squinky-implicature | | pragmeval_1 | Classification | +| 347 | pragmeval/squinky-informativeness | pragmeval | squinky-informativeness | | pragmeval_1 | Classification | +| 348 | pragmeval/switchboard | pragmeval | switchboard | | pragmeval_1 | Classification | +| 349 | pragmeval/mrda | pragmeval | mrda | | pragmeval_1 | Classification | +| 350 | pragmeval/verifiability | pragmeval | verifiability | | pragmeval_1 | Classification | +| 351 | pragmeval/emobank-arousal | pragmeval | emobank-arousal | | pragmeval_1 | Classification | +| 352 | pragmeval/persuasiveness-specificity | pragmeval | persuasiveness-specificity | | pragmeval_2 | Classification | +| 353 | pragmeval/persuasiveness-strength | pragmeval | persuasiveness-strength | | pragmeval_2 | Classification | +| 354 | pragmeval/persuasiveness-relevance | pragmeval | persuasiveness-relevance | | pragmeval_2 | Classification | +| 355 | pragmeval/stac | pragmeval | stac | | pragmeval_2 | Classification | +| 356 | pragmeval/pdtb | pragmeval | pdtb | | pragmeval_2 | Classification | | 357 | pragmeval/gum | pragmeval | gum | | pragmeval_2 | Classification | | 358 | pragmeval/emergent | pragmeval | emergent | | pragmeval_2 | Classification | -| 359 | pragmeval/stac | pragmeval | stac | | pragmeval_2 | Classification | +| 359 | pragmeval/persuasiveness-claimtype | pragmeval | persuasiveness-claimtype | | pragmeval_2 | Classification | | 360 | pragmeval/sarcasm | pragmeval | sarcasm | | pragmeval_2 | Classification | -| 361 | pragmeval/persuasiveness-strength | pragmeval | persuasiveness-strength | | pragmeval_2 | Classification | -| 362 | pragmeval/persuasiveness-specificity | pragmeval | persuasiveness-specificity | | pragmeval_2 | Classification | -| 363 | silicone/sem | silicone | sem | | silicone | Classification | -| 364 | silicone/iemocap | silicone | iemocap | | silicone | Classification | +| 361 | pragmeval/persuasiveness-premisetype | pragmeval | persuasiveness-premisetype | | pragmeval_2 | Classification | +| 362 | pragmeval/persuasiveness-eloquence | pragmeval | persuasiveness-eloquence | | pragmeval_2 | Classification | +| 363 | silicone/meld_s | silicone | meld_s | | silicone | Classification | +| 364 | silicone/maptask | silicone | maptask | | silicone | Classification | | 365 | silicone/dyda_e | silicone | dyda_e | | silicone | Classification | | 366 | silicone/dyda_da | silicone | dyda_da | | silicone | Classification | -| 367 | silicone/maptask | silicone | maptask | | silicone | Classification | -| 368 | silicone/meld_e | silicone | meld_e | | silicone | Classification | -| 369 | silicone/oasis | silicone | oasis | | silicone | Classification | -| 370 | silicone/meld_s | silicone | meld_s | | silicone | Classification | +| 367 | silicone/iemocap | silicone | iemocap | | silicone | Classification | +| 368 | silicone/oasis | silicone | oasis | | silicone | Classification | +| 369 | silicone/sem | silicone | sem | | silicone | Classification | +| 370 | silicone/meld_e | silicone | meld_e | | silicone | Classification | | 371 | lex_glue/eurlex | lex_glue | eurlex | | lex_glue___eurlex | Classification | | 372 | lex_glue/scotus | lex_glue | scotus | | lex_glue___scotus | Classification | | 373 | lex_glue/ledgar | lex_glue | ledgar | | lex_glue___ledgar | Classification | @@ -400,29 +400,29 @@ | 397 | go_emotions/simplified | go_emotions | simplified | | go_emotions___simplified | Classification | | 398 | scicite | allenai/scicite | | | scicite | Classification | | 399 | liar | liar | | | liar | Classification | -| 400 | lexical_relation_classification/K&H+N | relbert/lexical_relation_classification | K&H+N | | relbert_lexical_relation_classification | Classification | -| 401 | lexical_relation_classification/ROOT09 | relbert/lexical_relation_classification | ROOT09 | | relbert_lexical_relation_classification | Classification | -| 402 | lexical_relation_classification/CogALexV | relbert/lexical_relation_classification | CogALexV | | relbert_lexical_relation_classification | Classification | -| 403 | lexical_relation_classification/EVALution | relbert/lexical_relation_classification | EVALution | | relbert_lexical_relation_classification | Classification | +| 400 | lexical_relation_classification/ROOT09 | relbert/lexical_relation_classification | ROOT09 | | relbert_lexical_relation_classification | Classification | +| 401 | lexical_relation_classification/CogALexV | relbert/lexical_relation_classification | CogALexV | | relbert_lexical_relation_classification | Classification | +| 402 | lexical_relation_classification/EVALution | relbert/lexical_relation_classification | EVALution | | relbert_lexical_relation_classification | Classification | +| 403 | lexical_relation_classification/K&H+N | relbert/lexical_relation_classification | K&H+N | | relbert_lexical_relation_classification | Classification | | 404 | lexical_relation_classification/BLESS | relbert/lexical_relation_classification | BLESS | | relbert_lexical_relation_classification | Classification | -| 405 | linguisticprobing/top_constituents | tasksource/linguisticprobing | top_constituents | | linguisticprobing | Classification | -| 406 | linguisticprobing/obj_number | tasksource/linguisticprobing | obj_number | | linguisticprobing | Classification | -| 407 | linguisticprobing/subj_number | tasksource/linguisticprobing | subj_number | | linguisticprobing | Classification | -| 408 | linguisticprobing/tree_depth | tasksource/linguisticprobing | tree_depth | | linguisticprobing | Classification | -| 409 | linguisticprobing/past_present | tasksource/linguisticprobing | past_present | | linguisticprobing | Classification | -| 410 | linguisticprobing/odd_man_out | tasksource/linguisticprobing | odd_man_out | | linguisticprobing | Classification | -| 411 | linguisticprobing/coordination_inversion | tasksource/linguisticprobing | coordination_inversion | | linguisticprobing | Classification | -| 412 | linguisticprobing/bigram_shift | tasksource/linguisticprobing | bigram_shift | | linguisticprobing | Classification | -| 413 | linguisticprobing/sentence_length | tasksource/linguisticprobing | sentence_length | | linguisticprobing | Classification | -| 414 | crowdflower/economic-news | tasksource/crowdflower | economic-news | | crowdflower | Classification | +| 405 | linguisticprobing/sentence_length | tasksource/linguisticprobing | sentence_length | | linguisticprobing | Classification | +| 406 | linguisticprobing/top_constituents | tasksource/linguisticprobing | top_constituents | | linguisticprobing | Classification | +| 407 | linguisticprobing/past_present | tasksource/linguisticprobing | past_present | | linguisticprobing | Classification | +| 408 | linguisticprobing/obj_number | tasksource/linguisticprobing | obj_number | | linguisticprobing | Classification | +| 409 | linguisticprobing/subj_number | tasksource/linguisticprobing | subj_number | | linguisticprobing | Classification | +| 410 | linguisticprobing/tree_depth | tasksource/linguisticprobing | tree_depth | | linguisticprobing | Classification | +| 411 | linguisticprobing/bigram_shift | tasksource/linguisticprobing | bigram_shift | | linguisticprobing | Classification | +| 412 | linguisticprobing/coordination_inversion | tasksource/linguisticprobing | coordination_inversion | | linguisticprobing | Classification | +| 413 | linguisticprobing/odd_man_out | tasksource/linguisticprobing | odd_man_out | | linguisticprobing | Classification | +| 414 | crowdflower/political-media-bias | tasksource/crowdflower | political-media-bias | | crowdflower | Classification | | 415 | crowdflower/corporate-messaging | tasksource/crowdflower | corporate-messaging | | crowdflower | Classification | -| 416 | crowdflower/sentiment_nuclear_power | tasksource/crowdflower | sentiment_nuclear_power | | crowdflower | Classification | -| 417 | crowdflower/airline-sentiment | tasksource/crowdflower | airline-sentiment | | crowdflower | Classification | -| 418 | crowdflower/political-media-bias | tasksource/crowdflower | political-media-bias | | crowdflower | Classification | -| 419 | crowdflower/political-media-message | tasksource/crowdflower | political-media-message | | crowdflower | Classification | -| 420 | crowdflower/political-media-audience | tasksource/crowdflower | political-media-audience | | crowdflower | Classification | -| 421 | crowdflower/text_emotion | tasksource/crowdflower | text_emotion | | crowdflower | Classification | -| 422 | crowdflower/tweet_global_warming | tasksource/crowdflower | tweet_global_warming | | crowdflower | Classification | +| 416 | crowdflower/political-media-audience | tasksource/crowdflower | political-media-audience | | crowdflower | Classification | +| 417 | crowdflower/economic-news | tasksource/crowdflower | economic-news | | crowdflower | Classification | +| 418 | crowdflower/tweet_global_warming | tasksource/crowdflower | tweet_global_warming | | crowdflower | Classification | +| 419 | crowdflower/sentiment_nuclear_power | tasksource/crowdflower | sentiment_nuclear_power | | crowdflower | Classification | +| 420 | crowdflower/text_emotion | tasksource/crowdflower | text_emotion | | crowdflower | Classification | +| 421 | crowdflower/political-media-message | tasksource/crowdflower | political-media-message | | crowdflower | Classification | +| 422 | crowdflower/airline-sentiment | tasksource/crowdflower | airline-sentiment | | crowdflower | Classification | | 423 | ethics/commonsense | metaeval/ethics | commonsense | | ethics___commonsense | Classification | | 424 | ethics/deontology | metaeval/ethics | deontology | | ethics___deontology | Classification | | 425 | ethics/justice | metaeval/ethics | justice | | ethics___justice | Classification | @@ -484,14 +484,14 @@ | 481 | synthetic-instruct-gptj-pairwise | Dahoas/synthetic-instruct-gptj-pairwise | | | synthetic_instruct | MultipleChoice | | 482 | scruples | metaeval/scruples | | | scruples | Classification | | 483 | wouldyourather | metaeval/wouldyourather | | | wouldyourather | MultipleChoice | -| 484 | defeasible-nli/atomic | metaeval/defeasible-nli | atomic | | defeasible_nli | Classification | -| 485 | defeasible-nli/snli | metaeval/defeasible-nli | snli | | defeasible_nli | Classification | +| 484 | defeasible-nli/snli | metaeval/defeasible-nli | snli | | defeasible_nli | Classification | +| 485 | defeasible-nli/atomic | metaeval/defeasible-nli | atomic | | defeasible_nli | Classification | | 486 | help-nli | tasksource/help-nli | | | help_nli | Classification | | 487 | nli-veridicality-transitivity | metaeval/nli-veridicality-transitivity | | | nli_veridicality_transitivity | Classification | | 488 | lonli | tasksource/lonli | | | lonli | Classification | | 489 | dadc-limit-nli | tasksource/dadc-limit-nli | | | dadc_limit | Classification | | 490 | FLUTE | ColumbiaNLP/FLUTE | | | flute | Classification | -| 491 | strategy-qa | metaeval/strategy-qa | | | strategy_qa | Classification | +| 491 | strategy-qa | tasksource/strategy-qa | | | strategy_qa | Classification | | 492 | summarize_from_feedback/comparisons | openai/summarize_from_feedback | comparisons | | summarize_from_feedback | MultipleChoice | | 493 | folio | tasksource/folio | | | folio | Classification | | 494 | tomi-nli | tasksource/tomi-nli | | | tomi_nli | Classification | @@ -533,9 +533,9 @@ | 530 | oasst2_dense_flat/toxicity | tasksource/oasst2_dense_flat | | toxicity | oasst1__toxicity | Classification | | 531 | oasst2_dense_flat/helpfulness | tasksource/oasst2_dense_flat | | helpfulness | oasst1__helpfulness | Classification | | 532 | mindgames | sileod/mindgames | | | mindgames | Classification | -| 533 | universal_dependencies/en_gum/deprel | universal_dependencies | en_gum | deprel | udep__deprel | TokenClassification | +| 533 | universal_dependencies/en_partut/deprel | universal_dependencies | en_partut | deprel | udep__deprel | TokenClassification | | 534 | universal_dependencies/en_ewt/deprel | universal_dependencies | en_ewt | deprel | udep__deprel | TokenClassification | -| 535 | universal_dependencies/en_partut/deprel | universal_dependencies | en_partut | deprel | udep__deprel | TokenClassification | +| 535 | universal_dependencies/en_gum/deprel | universal_dependencies | en_gum | deprel | udep__deprel | TokenClassification | | 536 | universal_dependencies/en_lines/deprel | universal_dependencies | en_lines | deprel | udep__deprel | TokenClassification | | 537 | ambient | metaeval/ambient | | | ambient | Classification | | 538 | path-naturalness-prediction | metaeval/path-naturalness-prediction | | | path_naturalness | MultipleChoice | @@ -559,102 +559,102 @@ | 556 | sen-making/1 | tasksource/sen-making | | 1 | sen_making__1 | MultipleChoice | | 557 | sen-making/2 | tasksource/sen-making | | 2 | sen_making__2 | MultipleChoice | | 558 | winowhy | tasksource/winowhy | | | winowhy | Classification | -| 559 | mbib-base/cognitive-bias | mediabiasgroup/mbib-base | cognitive-bias | | mbib_cognitive_bias | Classification | -| 560 | mbib-base/fake-news | mediabiasgroup/mbib-base | fake-news | | mbib_fake_news | Classification | -| 561 | mbib-base/gender-bias | mediabiasgroup/mbib-base | gender-bias | | mbib_gender_bias | Classification | -| 562 | mbib-base/hate-speech | mediabiasgroup/mbib-base | hate-speech | | mbib_hate_speech | Classification | -| 563 | mbib-base/linguistic-bias | mediabiasgroup/mbib-base | linguistic-bias | | mbib_linguistic_bias | Classification | -| 564 | mbib-base/political-bias | mediabiasgroup/mbib-base | political-bias | | mbib_political_bias | Classification | -| 565 | mbib-base/racial-bias | mediabiasgroup/mbib-base | racial-bias | | mbib_racial_bias | Classification | -| 566 | mbib-base/text-level-bias | mediabiasgroup/mbib-base | text-level-bias | | mbib_text_level_bias | Classification | -| 567 | robustLR | tasksource/robustLR | | | robustLR | Classification | -| 568 | v1/gen_train234_test2to10 | CLUTRR/v1 | gen_train234_test2to10 | | cluttr | Classification | -| 569 | logical-fallacy | tasksource/logical-fallacy | | | logical_fallacy | Classification | -| 570 | parade | tasksource/parade | | | parade | Classification | -| 571 | cladder | tasksource/cladder | | | cladder | Classification | -| 572 | subjectivity | tasksource/subjectivity | | | subjectivity | Classification | -| 573 | MOH | tasksource/MOH | | | moh | Classification | -| 574 | VUAC | tasksource/VUAC | | | vuac | Classification | -| 575 | TroFi | tasksource/TroFi | | | trofi | Classification | -| 576 | sharc_modified/mod | sharc_modified | mod | | sharc_classification | Classification | -| 577 | conceptrules_v2 | tasksource/conceptrules_v2 | | | conceptrules_v2 | Classification | -| 578 | disrpt/eng.dep.scidtb.rels | metaeval/disrpt | eng.dep.scidtb.rels | | scidtb | Classification | -| 579 | conll2000 | conll2000 | | | chunking | TokenClassification | -| 580 | few-nerd/supervised | DFKI-SLT/few-nerd | supervised | | few_nerd | TokenClassification | -| 581 | finer-139 | nlpaueb/finer-139 | | | finer | TokenClassification | -| 582 | zero-shot-label-nli | tasksource/zero-shot-label-nli | | | label_nli | Classification | -| 583 | com2sense | tasksource/com2sense | | | com2sense | Classification | -| 584 | scone | tasksource/scone | | | scone | Classification | -| 585 | winodict | tasksource/winodict | | | winodict | MultipleChoice | -| 586 | fool-me-twice | tasksource/fool-me-twice | | | fool_me_twice | Classification | -| 587 | monli | tasksource/monli | | | monli | Classification | -| 588 | corr2cause | tasksource/corr2cause | | | causality | Classification | -| 589 | lsat_qa/all | lighteval/lsat_qa | all | | lsat | MultipleChoice | -| 590 | apt | tasksource/apt | | | apt | Classification | -| 591 | twitter-financial-news-sentiment | zeroshot/twitter-financial-news-sentiment | | | financial_sentiment | Classification | -| 592 | icl-symbol-tuning-instruct | tasksource/icl-symbol-tuning-instruct | | | icl | Classification | -| 593 | SpaceNLI | tasksource/SpaceNLI | | | space_nli | Classification | -| 594 | propsegment/nli | sihaochen/propsegment | nli | | propsegment | Classification | -| 595 | HatemojiBuild | HannahRoseKirk/HatemojiBuild | | | hatemoji | Classification | -| 596 | regset | tasksource/regset | | | regset | Classification | -| 597 | esci | tasksource/esci | | | esci | Classification | -| 598 | chatbot_arena_conversations | lmsys/chatbot_arena_conversations | | | chatbot_arena | MultipleChoice | -| 599 | dnd_style_intents | neurae/dnd_style_intents | | | dnd_intent | Classification | -| 600 | FLD.v2/default | hitachi-nlp/FLD.v2 | default | | fld | Classification | -| 601 | FLD.v2/star | hitachi-nlp/FLD.v2 | star | | flds | Classification | -| 602 | SDOH-NLI | tasksource/SDOH-NLI | | | sdoh_nli | Classification | -| 603 | scifact_entailment | allenai/scifact_entailment | | | scifact_entailment | Classification | -| 604 | feasibilityQA | tasksource/feasibilityQA | | | feasibilityQA | Classification | -| 605 | simple_pair | tasksource/simple_pair | | | simple_pair | Classification | -| 606 | AdjectiveScaleProbe-nli | tasksource/AdjectiveScaleProbe-nli | | | adjective_scale_probe | Classification | -| 607 | resnli | tasksource/resnli | | | repectively_nli | Classification | -| 608 | SpaRTUN | tasksource/SpaRTUN | | | spartun | MultipleChoice | -| 609 | ReSQ | tasksource/ReSQ | | | resq | MultipleChoice | -| 610 | semantic_fragments_nli | tasksource/semantic_fragments_nli | | | semantic_fragments_nli | Classification | -| 611 | dataset_train_nli | MoritzLaurer/dataset_train_nli | | | moritz_zs_nli | Classification | -| 612 | stepgame | tasksource/stepgame | | | stepgame | Classification | -| 613 | nlgraph | tasksource/nlgraph | | | nlgraph | Classification | -| 614 | oasst2_pairwise_rlhf_reward | tasksource/oasst2_pairwise_rlhf_reward | | | oasst_rlhf | MultipleChoice | -| 615 | hh-rlhf/helpful-online | tasksource/hh-rlhf | helpful-online | | anthropic_rlhf_helpfulness | MultipleChoice | -| 616 | hh-rlhf/helpful-base | tasksource/hh-rlhf | helpful-base | | anthropic_rlhf_helpfulness | MultipleChoice | -| 617 | hh-rlhf/helpful-rejection-sampled | tasksource/hh-rlhf | helpful-rejection-sampled | | anthropic_rlhf_helpfulness | MultipleChoice | -| 618 | hh-rlhf/harmless-base | tasksource/hh-rlhf | harmless-base | | anthropic_rlhf_harmless | MultipleChoice | -| 619 | ruletaker | tasksource/ruletaker | | | ruletaker | Classification | -| 620 | PARARULE-Plus | qbao775/PARARULE-Plus | | | para_rules | Classification | -| 621 | proofwriter | tasksource/proofwriter | | | proofwriter_deduction | Classification | -| 622 | logical-entailment | tasksource/logical-entailment | | | logical_entailment | Classification | -| 623 | nope | tasksource/nope | | | nope | Classification | -| 624 | LogicNLI | tasksource/LogicNLI | | | logicNLI | Classification | -| 625 | contract-nli/contractnli_a | kiddothe2b/contract-nli | contractnli_a | | contract_nli | Classification | -| 626 | nli4ct_semeval2024 | AshtonIsNotHere/nli4ct_semeval2024 | | | nli4ct | Classification | -| 627 | lsat-ar | tasksource/lsat-ar | | | lsat_ar | MultipleChoice | -| 628 | lsat-rc | tasksource/lsat-rc | | | lsat_rc | MultipleChoice | -| 629 | biosift-nli | AshtonIsNotHere/biosift-nli | | | biosift_nli | Classification | -| 630 | brainteasers/WP | tasksource/brainteasers | WP | | brainteasers | MultipleChoice | -| 631 | brainteasers/SP | tasksource/brainteasers | SP | | brainteasers | MultipleChoice | -| 632 | persuasion | Anthropic/persuasion | | | persuasiveness | Classification | -| 633 | AmbigNQ-clarifying-question | erbacher/AmbigNQ-clarifying-question | | | ambigNQ | Classification | -| 634 | SIGA-nli | tasksource/SIGA-nli | | | siga_nli | Classification | -| 635 | FOL-nli | unigram/FOL-nli | | | unigram_fol | Classification | -| 636 | goal-step-wikihow/order | tasksource/goal-step-wikihow | order | | gs_order | MultipleChoice | -| 637 | PARADISE | GGLab/PARADISE | | | paradise | MultipleChoice | -| 638 | doc-nli | tasksource/doc-nli | | | docnli | Classification | -| 639 | mctest-nli | tasksource/mctest-nli | | | mctest_nli | Classification | -| 640 | patent-phrase-similarity | tasksource/patent-phrase-similarity | | | patent_phrase_similarity | Classification | -| 641 | natural-language-satisfiability | tasksource/natural-language-satisfiability | | | nlsat | Classification | -| 642 | idioms-nli | tasksource/idioms-nli | | | idioms_nli | Classification | -| 643 | lifecycle-entailment | tasksource/lifecycle-entailment | | | lifeycle_entailment | Classification | -| 644 | HelpSteer2/helpfulness | nvidia/HelpSteer2 | | helpfulness | helpsteer_2__helpfulness | Classification | -| 645 | HelpSteer2/correctness | nvidia/HelpSteer2 | | correctness | helpsteer_2__correctness | Classification | -| 646 | HelpSteer2/coherence | nvidia/HelpSteer2 | | coherence | helpsteer_2__coherence | Classification | -| 647 | HelpSteer2/complexity | nvidia/HelpSteer2 | | complexity | helpsteer_2__complexity | Classification | -| 648 | HelpSteer2/verbosity | nvidia/HelpSteer2 | | verbosity | helpsteer_2__verbosity | Classification | -| 649 | HelpSteer/helpfulness | nvidia/HelpSteer | | helpfulness | helpsteer__helpfulness | Classification | -| 650 | HelpSteer/correctness | nvidia/HelpSteer | | correctness | helpsteer__correctness | Classification | -| 651 | HelpSteer/coherence | nvidia/HelpSteer | | coherence | helpsteer__coherence | Classification | -| 652 | HelpSteer/complexity | nvidia/HelpSteer | | complexity | helpsteer__complexity | Classification | -| 653 | HelpSteer/verbosity | nvidia/HelpSteer | | verbosity | helpsteer__verbosity | Classification | -| 654 | MSciNLI | sadat2307/MSciNLI | | | msci_nli | Classification | -| 655 | lex_glue/ecthr_a | coastalcph/lex_glue | ecthr_a | | lex_glue___ecthr_a | Classification | -| 656 | lex_glue/ecthr_b | lex_glue | ecthr_b | | lex_glue___ecthr_b | Classification | -| 657 | UltraFeedback-paired | pushpdeep/UltraFeedback-paired | | | ultrafeedback | MultipleChoice | +| 559 | robustLR | tasksource/robustLR | | | robustLR | Classification | +| 560 | v1/gen_train234_test2to10 | CLUTRR/v1 | gen_train234_test2to10 | | cluttr | Classification | +| 561 | logical-fallacy | tasksource/logical-fallacy | | | logical_fallacy | Classification | +| 562 | parade | tasksource/parade | | | parade | Classification | +| 563 | cladder | tasksource/cladder | | | cladder | Classification | +| 564 | subjectivity | tasksource/subjectivity | | | subjectivity | Classification | +| 565 | MOH | tasksource/MOH | | | moh | Classification | +| 566 | VUAC | tasksource/VUAC | | | vuac | Classification | +| 567 | TroFi | tasksource/TroFi | | | trofi | Classification | +| 568 | sharc_modified/mod | sharc_modified | mod | | sharc_classification | Classification | +| 569 | conceptrules_v2 | tasksource/conceptrules_v2 | | | conceptrules_v2 | Classification | +| 570 | disrpt/eng.dep.scidtb.rels | metaeval/disrpt | eng.dep.scidtb.rels | | scidtb | Classification | +| 571 | conll2000 | conll2000 | | | chunking | TokenClassification | +| 572 | few-nerd/supervised | DFKI-SLT/few-nerd | supervised | | few_nerd | TokenClassification | +| 573 | finer-139 | nlpaueb/finer-139 | | | finer | TokenClassification | +| 574 | zero-shot-label-nli | tasksource/zero-shot-label-nli | | | label_nli | Classification | +| 575 | com2sense | tasksource/com2sense | | | com2sense | Classification | +| 576 | scone | tasksource/scone | | | scone | Classification | +| 577 | winodict | tasksource/winodict | | | winodict | MultipleChoice | +| 578 | fool-me-twice | tasksource/fool-me-twice | | | fool_me_twice | Classification | +| 579 | monli | tasksource/monli | | | monli | Classification | +| 580 | corr2cause | tasksource/corr2cause | | | causality | Classification | +| 581 | lsat_qa/all | lighteval/lsat_qa | all | | lsat | MultipleChoice | +| 582 | apt | tasksource/apt | | | apt | Classification | +| 583 | twitter-financial-news-sentiment | zeroshot/twitter-financial-news-sentiment | | | financial_sentiment | Classification | +| 584 | icl-symbol-tuning-instruct | tasksource/icl-symbol-tuning-instruct | | | icl | Classification | +| 585 | SpaceNLI | tasksource/SpaceNLI | | | space_nli | Classification | +| 586 | propsegment/nli | sihaochen/propsegment | nli | | propsegment | Classification | +| 587 | HatemojiBuild | HannahRoseKirk/HatemojiBuild | | | hatemoji | Classification | +| 588 | regset | tasksource/regset | | | regset | Classification | +| 589 | esci | tasksource/esci | | | esci | Classification | +| 590 | chatbot_arena_conversations | lmsys/chatbot_arena_conversations | | | chatbot_arena | MultipleChoice | +| 591 | dnd_style_intents | neurae/dnd_style_intents | | | dnd_intent | Classification | +| 592 | FLD.v2/default | hitachi-nlp/FLD.v2 | default | | fld | Classification | +| 593 | FLD.v2/star | hitachi-nlp/FLD.v2 | star | | flds | Classification | +| 594 | SDOH-NLI | tasksource/SDOH-NLI | | | sdoh_nli | Classification | +| 595 | scifact_entailment | allenai/scifact_entailment | | | scifact_entailment | Classification | +| 596 | feasibilityQA | tasksource/feasibilityQA | | | feasibilityQA | Classification | +| 597 | simple_pair | tasksource/simple_pair | | | simple_pair | Classification | +| 598 | AdjectiveScaleProbe-nli | tasksource/AdjectiveScaleProbe-nli | | | adjective_scale_probe | Classification | +| 599 | resnli | tasksource/resnli | | | repectively_nli | Classification | +| 600 | SpaRTUN | tasksource/SpaRTUN | | | spartun | MultipleChoice | +| 601 | ReSQ | tasksource/ReSQ | | | resq | MultipleChoice | +| 602 | semantic_fragments_nli | tasksource/semantic_fragments_nli | | | semantic_fragments_nli | Classification | +| 603 | dataset_train_nli | MoritzLaurer/dataset_train_nli | | | moritz_zs_nli | Classification | +| 604 | stepgame | tasksource/stepgame | | | stepgame | Classification | +| 605 | nlgraph | tasksource/nlgraph | | | nlgraph | Classification | +| 606 | oasst2_pairwise_rlhf_reward | tasksource/oasst2_pairwise_rlhf_reward | | | oasst_rlhf | MultipleChoice | +| 607 | hh-rlhf/helpful-rejection-sampled | tasksource/hh-rlhf | helpful-rejection-sampled | | anthropic_rlhf_helpfulness | MultipleChoice | +| 608 | hh-rlhf/helpful-online | tasksource/hh-rlhf | helpful-online | | anthropic_rlhf_helpfulness | MultipleChoice | +| 609 | hh-rlhf/helpful-base | tasksource/hh-rlhf | helpful-base | | anthropic_rlhf_helpfulness | MultipleChoice | +| 610 | hh-rlhf/harmless-base | tasksource/hh-rlhf | harmless-base | | anthropic_rlhf_harmless | MultipleChoice | +| 611 | ruletaker | tasksource/ruletaker | | | ruletaker | Classification | +| 612 | PARARULE-Plus | qbao775/PARARULE-Plus | | | para_rules | Classification | +| 613 | proofwriter | tasksource/proofwriter | | | proofwriter_deduction | Classification | +| 614 | logical-entailment | tasksource/logical-entailment | | | logical_entailment | Classification | +| 615 | nope | tasksource/nope | | | nope | Classification | +| 616 | LogicNLI | tasksource/LogicNLI | | | logicNLI | Classification | +| 617 | contract-nli/contractnli_a | kiddothe2b/contract-nli | contractnli_a | | contract_nli | Classification | +| 618 | nli4ct_semeval2024 | AshtonIsNotHere/nli4ct_semeval2024 | | | nli4ct | Classification | +| 619 | lsat-ar | tasksource/lsat-ar | | | lsat_ar | MultipleChoice | +| 620 | lsat-rc | tasksource/lsat-rc | | | lsat_rc | MultipleChoice | +| 621 | biosift-nli | AshtonIsNotHere/biosift-nli | | | biosift_nli | Classification | +| 622 | brainteasers/SP | tasksource/brainteasers | SP | | brainteasers | MultipleChoice | +| 623 | brainteasers/WP | tasksource/brainteasers | WP | | brainteasers | MultipleChoice | +| 624 | persuasion | Anthropic/persuasion | | | persuasiveness | Classification | +| 625 | AmbigNQ-clarifying-question | erbacher/AmbigNQ-clarifying-question | | | ambigNQ | Classification | +| 626 | SIGA-nli | tasksource/SIGA-nli | | | siga_nli | Classification | +| 627 | FOL-nli | unigram/FOL-nli | | | unigram_fol | Classification | +| 628 | goal-step-wikihow/order | tasksource/goal-step-wikihow | order | | gs_order | MultipleChoice | +| 629 | PARADISE | GGLab/PARADISE | | | paradise | MultipleChoice | +| 630 | doc-nli | tasksource/doc-nli | | | docnli | Classification | +| 631 | mctest-nli | tasksource/mctest-nli | | | mctest_nli | Classification | +| 632 | patent-phrase-similarity | tasksource/patent-phrase-similarity | | | patent_phrase_similarity | Classification | +| 633 | natural-language-satisfiability | tasksource/natural-language-satisfiability | | | nlsat | Classification | +| 634 | idioms-nli | tasksource/idioms-nli | | | idioms_nli | Classification | +| 635 | lifecycle-entailment | tasksource/lifecycle-entailment | | | lifeycle_entailment | Classification | +| 636 | HelpSteer2/helpfulness | nvidia/HelpSteer2 | | helpfulness | helpsteer_2__helpfulness | Classification | +| 637 | HelpSteer2/correctness | nvidia/HelpSteer2 | | correctness | helpsteer_2__correctness | Classification | +| 638 | HelpSteer2/coherence | nvidia/HelpSteer2 | | coherence | helpsteer_2__coherence | Classification | +| 639 | HelpSteer2/complexity | nvidia/HelpSteer2 | | complexity | helpsteer_2__complexity | Classification | +| 640 | HelpSteer2/verbosity | nvidia/HelpSteer2 | | verbosity | helpsteer_2__verbosity | Classification | +| 641 | HelpSteer/helpfulness | nvidia/HelpSteer | | helpfulness | helpsteer__helpfulness | Classification | +| 642 | HelpSteer/correctness | nvidia/HelpSteer | | correctness | helpsteer__correctness | Classification | +| 643 | HelpSteer/coherence | nvidia/HelpSteer | | coherence | helpsteer__coherence | Classification | +| 644 | HelpSteer/complexity | nvidia/HelpSteer | | complexity | helpsteer__complexity | Classification | +| 645 | HelpSteer/verbosity | nvidia/HelpSteer | | verbosity | helpsteer__verbosity | Classification | +| 646 | MSciNLI | sadat2307/MSciNLI | | | msci_nli | Classification | +| 647 | lex_glue/ecthr_a | coastalcph/lex_glue | ecthr_a | | lex_glue___ecthr_a | Classification | +| 648 | lex_glue/ecthr_b | lex_glue | ecthr_b | | lex_glue___ecthr_b | Classification | +| 649 | UltraFeedback-paired | pushpdeep/UltraFeedback-paired | | | ultrafeedback | MultipleChoice | +| 650 | AES2-essay-scoring | tasksource/AES2-essay-scoring | | | essay_scoring | Classification | +| 651 | argument-feedback | tasksource/argument-feedback | | | argument_feedback | Classification | +| 652 | english-grading/cohesion | tasksource/english-grading | | cohesion | grading__cohesion | Classification | +| 653 | english-grading/syntax | tasksource/english-grading | | syntax | grading__syntax | Classification | +| 654 | english-grading/vocabulary | tasksource/english-grading | | vocabulary | grading__vocabulary | Classification | +| 655 | english-grading/phraseology | tasksource/english-grading | | phraseology | grading__phraseology | Classification | +| 656 | english-grading/grammar | tasksource/english-grading | | grammar | grading__grammar | Classification | +| 657 | english-grading/conventions | tasksource/english-grading | | conventions | grading__conventions | Classification |