diff --git a/.gitignore b/.gitignore index 9de499c0..39b0ab5b 100755 --- a/.gitignore +++ b/.gitignore @@ -152,4 +152,6 @@ _proc _docs lepiszcze-submissions/ -checkpoints/ \ No newline at end of file +checkpoints/ +# Local Netlify folder +.netlify diff --git a/webpage/.netlify/edge-functions-import-map.json b/webpage/.netlify/edge-functions-import-map.json new file mode 100644 index 00000000..1b91bef4 --- /dev/null +++ b/webpage/.netlify/edge-functions-import-map.json @@ -0,0 +1 @@ +{"imports":{"@netlify/edge-functions":"https://edge.netlify.com/v1/index.ts","netlify:edge":"https://edge.netlify.com/v1/index.ts?v=legacy"},"scopes":{}} \ No newline at end of file diff --git a/webpage/content/tasks/taskIR.md b/webpage/content/tasks/taskIR.md new file mode 100644 index 00000000..5789951d --- /dev/null +++ b/webpage/content/tasks/taskIR.md @@ -0,0 +1,11 @@ +--- +url: "/tasks/InformationRetrieval" +type: docs +geekdocNav: false +geekdocBreadcrumb: false +--- + +{{< pageHeader >}} +{{< info taskname="Information Retrieval" taskdesc="Information Retrieval (IR) is the process of locating and retrieving relevant data or documents from a large dataset based on user queries, often used in search engines">}} +{{< averageResults tasktype="Information Retrieval" >}} +{{< results type="Information Retrieval" >}} \ No newline at end of file diff --git a/webpage/content/tasks/taskQA.md b/webpage/content/tasks/taskQA.md new file mode 100644 index 00000000..a4b8f963 --- /dev/null +++ b/webpage/content/tasks/taskQA.md @@ -0,0 +1,11 @@ +--- +url: "/tasks/QuestionAnswering" +type: docs +geekdocNav: false +geekdocBreadcrumb: false +--- + +{{< pageHeader >}} +{{< info taskname="Question Answering" taskdesc="Extractive Question Answering identifies precise spans within a document to answer a query, without generating new text or rephrasing">}} +{{< averageResults tasktype="Question Answering">}} +{{< results type="Question Answering" >}} diff --git a/webpage/data/results/msmarco_bm_25.json b/webpage/data/results/msmarco_bm_25.json new file mode 100644 index 00000000..f114d5d4 --- /dev/null +++ b/webpage/data/results/msmarco_bm_25.json @@ -0,0 +1,25 @@ +{ + "submission_name": "msmarco_bm_25", + "dataset_name": "MSMARCO", + "dataset_version": "0.0.0", + "embedding_name": "BM25", + "hparams": {}, + "packages": [], + "config": null, + "leaderboard_task_name": "Information Retrieval", + "metrics": [ + { + "NDCG@10": 31.50, + "MRR@10": 56.36 + } + ], + "metrics_avg": { + "NDCG@10": 31.50, + "MRR@10": 56.36 + }, + "metrics_std": { + "NDCG@10": 0.0, + "MRR@10": 0.0 + }, + "averaged_over": 1 +} \ No newline at end of file diff --git a/webpage/data/results/msmarco_ict.json b/webpage/data/results/msmarco_ict.json new file mode 100644 index 00000000..f1b5915e --- /dev/null +++ b/webpage/data/results/msmarco_ict.json @@ -0,0 +1,25 @@ +{ + "submission_name": "msmarco_ict", + "dataset_name": "MSMARCO", + "dataset_version": "0.0.0", + "embedding_name": "ICT", + "hparams": {}, + "packages": [], + "config": null, + "leaderboard_task_name": "Information Retrieval", + "metrics": [ + { + "NDCG@10": 29.02, + "MRR@10": 43.91 + } + ], + "metrics_avg": { + "NDCG@10": 29.02, + "MRR@10": 43.91 + }, + "metrics_std": { + "NDCG@10": 0.0, + "MRR@10": 0.0 + }, + "averaged_over": 1 +} \ No newline at end of file diff --git a/webpage/data/results/qa_all_allegro__herbert-base-cased.json b/webpage/data/results/qa_all_allegro__herbert-base-cased.json new file mode 100644 index 00000000..10a43544 --- /dev/null +++ b/webpage/data/results/qa_all_allegro__herbert-base-cased.json @@ -0,0 +1,34 @@ +{ + "submission_name": "qa_all_allegro__herbert-base-cased", + "dataset_name": "qa_all", + "dataset_version": "0.0.0", + "embedding_name": "allegro/herbert-base-cased", + "hparams": {}, + "packages": [], + "config": null, + "leaderboard_task_name": "Question Answering", + "metrics": [ + { + "f1": 81.06677705918307, + "exact": 72.24858757062147, + "HasAns_f1": 72.2968454891892, + "HasAns_exact": 56.738437001594896, + "NoAns_f1": 92.54042775169536 + } + ], + "metrics_avg": { + "f1": 81.06677705918307, + "exact": 72.24858757062147, + "HasAns_f1": 72.2968454891892, + "HasAns_exact": 56.738437001594896, + "NoAns_f1": 92.54042775169536 + }, + "metrics_std": { + "f1": 0, + "exact": 0, + "HasAns_f1": 0, + "HasAns_exact": 0, + "NoAns_f1": 0 + }, + "averaged_over": 1 +} diff --git a/webpage/layouts/shortcodes/averageResults.html b/webpage/layouts/shortcodes/averageResults.html index 0abd7fcb..b341b841 100644 --- a/webpage/layouts/shortcodes/averageResults.html +++ b/webpage/layouts/shortcodes/averageResults.html @@ -7,12 +7,16 @@ {{ $metrics := slice }} {{ $metricsToIgnore := slice "classes" "f1_weighted" "recall_weighted" "precision_weighted" }} -{{ range $.Site.Data.results }} - {{ range $k, $v := .metrics_avg }} - {{if (not (in $metricsToIgnore $k)) }} - {{ $metrics = uniq ($metrics | append $k) }} - {{end}} - {{ end }} +{{ range $file, $result := $.Site.Data.results }} + {{if or (eq $tasktype "Tasks") (eq (index $result "leaderboard_task_name") $tasktype)}} + {{if (not (or (eq (index $result "leaderboard_task_name") "Information Retrieval") (eq (index $result "leaderboard_task_name") "Question Answering")))}} + {{ range $k, $v := .metrics_avg }} + {{if (not (in $metricsToIgnore $k)) }} + {{ $metrics = uniq ($metrics | append $k) }} + {{end}} + {{ end }} + {{end}} + {{end}} {{ end }} {{ $sum := slice }}
-
- {{ if eq $tasktype "Tasks"}} +
+ {{ if eq $tasktype "Tasks"}}
Model ranking

Average model performance across all tasks.

- {{else}} -
Model's task ranking
-

Average model performance across task datasets.

+ {{else}} + {{if (not (or (eq $tasktype "Information Retrieval") (eq $tasktype "Question Answering")))}} +
Model's task ranking
+

Average model performance across task datasets.

+ {{end}} + {{end}}
+ {{if (not (or (eq $tasktype "Information Retrieval") (eq $tasktype "Question Answering")))}} - -