diff --git a/404.html b/404.html
index d7082f8f..83e3fc00 100644
--- a/404.html
+++ b/404.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>404: This page could not be found</title><meta name="next-head-count" content="3"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/pages/_error-4afcb85b7c260fd3.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div style="font-family:system-ui,&quot;Segoe UI&quot;,Roboto,Helvetica,Arial,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;;height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div style="line-height:48px"><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding-right:23px;font-size:24px;font-weight:500;vertical-align:top">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:28px">This page could not be found<!-- -->.</h2></div></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>404: This page could not be found</title><meta name="next-head-count" content="3"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/pages/_error-4afcb85b7c260fd3.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div style="font-family:system-ui,&quot;Segoe UI&quot;,Roboto,Helvetica,Arial,sans-serif,&quot;Apple Color Emoji&quot;,&quot;Segoe UI Emoji&quot;;height:100vh;text-align:center;display:flex;flex-direction:column;align-items:center;justify-content:center"><div style="line-height:48px"><style>body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}</style><h1 class="next-error-h1" style="display:inline-block;margin:0 20px 0 0;padding-right:23px;font-size:24px;font-weight:500;vertical-align:top">404</h1><div style="display:inline-block"><h2 style="font-size:14px;font-weight:400;line-height:28px">This page could not be found<!-- -->.</h2></div></div></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"V1edrWahfIsCPthpIgASU","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/ART.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/ART.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/ART.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/ART.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/BiSECT.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/BiSECT.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/BiSECT.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/BiSECT.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/CrossWOZ.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/CrossWOZ.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/CrossWOZ.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/CrossWOZ.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/FairytaleQA.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/FairytaleQA.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/FairytaleQA.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/FairytaleQA.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/OrangeSum.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/OrangeSum.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/OrangeSum.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/OrangeSum.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/RiSAWOZ.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/RiSAWOZ.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/RiSAWOZ.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/RiSAWOZ.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/RotoWire_English-German.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/RotoWire_English-German.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/RotoWire_English-German.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/RotoWire_English-German.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/SIMPITIKI.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/SIMPITIKI.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/SIMPITIKI.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/SIMPITIKI.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/SciDuet.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/SciDuet.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/SciDuet.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/SciDuet.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/Taskmaster.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/Taskmaster.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/Taskmaster.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/Taskmaster.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/cochrane-simplification.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/cochrane-simplification.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/cochrane-simplification.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/cochrane-simplification.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/common_gen.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/common_gen.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/common_gen.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/common_gen.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/conversational_weather.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/conversational_weather.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/conversational_weather.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/conversational_weather.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/cs_restaurants.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/cs_restaurants.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/cs_restaurants.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/cs_restaurants.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/dart.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/dart.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/dart.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/dart.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/dstc10_track2_task2.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/dstc10_track2_task2.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/dstc10_track2_task2.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/dstc10_track2_task2.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/e2e_nlg.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/e2e_nlg.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/e2e_nlg.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/e2e_nlg.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/indonlg.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/indonlg.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/indonlg.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/indonlg.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/mlb_data_to_text.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/mlb_data_to_text.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/mlb_data_to_text.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/mlb_data_to_text.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/mlsum.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/mlsum.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/mlsum.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/mlsum.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/opusparcus.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/opusparcus.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/opusparcus.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/opusparcus.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/schema_guided_dialog.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/schema_guided_dialog.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/schema_guided_dialog.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/schema_guided_dialog.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/sportsett_basketball.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/sportsett_basketball.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/sportsett_basketball.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/sportsett_basketball.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/squad_v2.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/squad_v2.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/squad_v2.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/squad_v2.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/squality.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/squality.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/squality.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/squality.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/surface_realisation_st_2020.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/surface_realisation_st_2020.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/surface_realisation_st_2020.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/surface_realisation_st_2020.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/totto.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/totto.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/totto.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/totto.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/turku_hockey_data2text.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/turku_hockey_data2text.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/turku_hockey_data2text.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/turku_hockey_data2text.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/turku_paraphrase_corpus.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/turku_paraphrase_corpus.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/turku_paraphrase_corpus.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/turku_paraphrase_corpus.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/viggo.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/viggo.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/viggo.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/viggo.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/web_nlg.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/web_nlg.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/web_nlg.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/web_nlg.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/wiki_auto_asset_turk.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/wiki_auto_asset_turk.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/wiki_auto_asset_turk.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/wiki_auto_asset_turk.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/wiki_cat_sum.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/wiki_cat_sum.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/wiki_cat_sum.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/wiki_cat_sum.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/wiki_lingua.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/wiki_lingua.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/wiki_lingua.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/wiki_lingua.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/xlsum.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/xlsum.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/xlsum.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/xlsum.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/xsum.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/xsum.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/xsum.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/xsum.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/xwikis.json b/_next/data/V1edrWahfIsCPthpIgASU/data_cards/xwikis.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/data_cards/xwikis.json
rename to _next/data/V1edrWahfIsCPthpIgASU/data_cards/xwikis.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/hackathon.json b/_next/data/V1edrWahfIsCPthpIgASU/hackathon.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/hackathon.json
rename to _next/data/V1edrWahfIsCPthpIgASU/hackathon.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/model_cards.json b/_next/data/V1edrWahfIsCPthpIgASU/model_cards.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/model_cards.json
rename to _next/data/V1edrWahfIsCPthpIgASU/model_cards.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/model_cards/FB.json b/_next/data/V1edrWahfIsCPthpIgASU/model_cards/FB.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/model_cards/FB.json
rename to _next/data/V1edrWahfIsCPthpIgASU/model_cards/FB.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/model_cards/NUIG-DSI.json b/_next/data/V1edrWahfIsCPthpIgASU/model_cards/NUIG-DSI.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/model_cards/NUIG-DSI.json
rename to _next/data/V1edrWahfIsCPthpIgASU/model_cards/NUIG-DSI.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/model_cards/POINTER.json b/_next/data/V1edrWahfIsCPthpIgASU/model_cards/POINTER.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/model_cards/POINTER.json
rename to _next/data/V1edrWahfIsCPthpIgASU/model_cards/POINTER.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/model_cards/SimpleNER.json b/_next/data/V1edrWahfIsCPthpIgASU/model_cards/SimpleNER.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/model_cards/SimpleNER.json
rename to _next/data/V1edrWahfIsCPthpIgASU/model_cards/SimpleNER.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/nl_augmenter.json b/_next/data/V1edrWahfIsCPthpIgASU/nl_augmenter.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/nl_augmenter.json
rename to _next/data/V1edrWahfIsCPthpIgASU/nl_augmenter.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/results.json b/_next/data/V1edrWahfIsCPthpIgASU/results.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/results.json
rename to _next/data/V1edrWahfIsCPthpIgASU/results.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/shared_task.json b/_next/data/V1edrWahfIsCPthpIgASU/shared_task.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/shared_task.json
rename to _next/data/V1edrWahfIsCPthpIgASU/shared_task.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/team.json b/_next/data/V1edrWahfIsCPthpIgASU/team.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/team.json
rename to _next/data/V1edrWahfIsCPthpIgASU/team.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/team/2021.json b/_next/data/V1edrWahfIsCPthpIgASU/team/2021.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/team/2021.json
rename to _next/data/V1edrWahfIsCPthpIgASU/team/2021.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/turker_faq.json b/_next/data/V1edrWahfIsCPthpIgASU/turker_faq.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/turker_faq.json
rename to _next/data/V1edrWahfIsCPthpIgASU/turker_faq.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/tutorials.json b/_next/data/V1edrWahfIsCPthpIgASU/tutorials.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/tutorials.json
rename to _next/data/V1edrWahfIsCPthpIgASU/tutorials.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/tutorials/modeling.json b/_next/data/V1edrWahfIsCPthpIgASU/tutorials/modeling.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/tutorials/modeling.json
rename to _next/data/V1edrWahfIsCPthpIgASU/tutorials/modeling.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/tutorials/new_data_loader.json b/_next/data/V1edrWahfIsCPthpIgASU/tutorials/new_data_loader.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/tutorials/new_data_loader.json
rename to _next/data/V1edrWahfIsCPthpIgASU/tutorials/new_data_loader.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/tutorials/new_nl_augmenter_transformation.json b/_next/data/V1edrWahfIsCPthpIgASU/tutorials/new_nl_augmenter_transformation.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/tutorials/new_nl_augmenter_transformation.json
rename to _next/data/V1edrWahfIsCPthpIgASU/tutorials/new_nl_augmenter_transformation.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/tutorials/writing_a_data_card.json b/_next/data/V1edrWahfIsCPthpIgASU/tutorials/writing_a_data_card.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/tutorials/writing_a_data_card.json
rename to _next/data/V1edrWahfIsCPthpIgASU/tutorials/writing_a_data_card.json
diff --git a/_next/data/V1edrWahfIsCPthpIgASU/workshop.json b/_next/data/V1edrWahfIsCPthpIgASU/workshop.json
new file mode 100644
index 00000000..022be1a8
--- /dev/null
+++ b/_next/data/V1edrWahfIsCPthpIgASU/workshop.json
@@ -0,0 +1 @@
+{"pageProps":{"workshopData":{"contentHtml":"<p>The Third Version of the <a href=\"https://gem-benchmark.com/\">Generation, Evaluation &#x26; Metrics (GEM) Workshop</a> will be held as part of <a href=\"https://2023.emnlp.org/\">EMNLP</a>, 📅 December 6, 2023.</p>\n<h2 id=\"user-content-overview\">Overview</h2>\n<p>Many new NLP applications are cast through the lens of natural language generation. With the advent of these new approaches, many opportunities arise: generation in previously less studied languages, new evaluation paradigms, methods for corpus creation, more efficient architectures, strategies for safe deployments, among many others. At the same time, we can learn from the rich history of NLG research to further improve generation methods.\nThese developments require robust and sound NLG evaluation processes. To that end, the GEM workshop aims to encourage the development of model auditing and human evaluation strategies, and to popularize model evaluations in languages beyond English.</p>\n<p>If you are interested, you can check out last year's workshop websites from <a href=\"/workshop/2021\">ACL 2021</a> and <a href=\"/workshop/2022\">EMNLP 2022</a>. Our call for this workshop can be found <a href=\"/workshop/2023-call\">here</a>.</p>\n<h2 id=\"user-content-schedule\">Schedule</h2>\n<p>** This will be filled in a few days**</p>\n<p>All times in local Singapore Time, please use a converter like <a href=\"https://www.timeanddate.com/worldclock/converter.html?iso=20231108T180000&#x26;p1=236\">this one</a> to if you are in a different time zone.\nTo accomodate attendees from as many time zones as possible, we will have a virtual-only part in the evening.</p>\n<table>\n<thead>\n<tr>\n<th>Start</th>\n<th>End</th>\n<th></th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>9:00</td>\n<td>10:30</td>\n<td>Opening Remarks + 6 x 12 minutes talk</td>\n</tr>\n<tr>\n<td>10:30</td>\n<td>11:00</td>\n<td>Coffee Break</td>\n</tr>\n<tr>\n<td>11:00</td>\n<td>12:30</td>\n<td>Poster Session</td>\n</tr>\n<tr>\n<td>12:30</td>\n<td>14:00</td>\n<td>Lunch Break</td>\n</tr>\n<tr>\n<td>14:00</td>\n<td>15:30</td>\n<td>7 x 12 minutes talk</td>\n</tr>\n<tr>\n<td>15:30</td>\n<td>16:00</td>\n<td>Coffee Break</td>\n</tr>\n<tr>\n<td>16:00</td>\n<td>17:30</td>\n<td>Poster Session II</td>\n</tr>\n</tbody>\n</table>\n<h2 id=\"user-content-papers\">Papers</h2>\n<p>Here is a list of papers you will be able to see presented at our workshop:</p>\n<table>\n<thead>\n<tr>\n<th>ID</th>\n<th>Type</th>\n<th>Title</th>\n<th>Authors</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>223</td>\n<td>Findings</td>\n<td>MacLaSa: Multi-Aspect Controllable Text Generation via Efficient Sampling from Compact Latent Space</td>\n<td>Hanxing Ding, Liang Pang, Zihao Wei, Huawei Shen, Xueqi Cheng, Tat-Seng Chua</td>\n</tr>\n<tr>\n<td>271</td>\n<td>Findings</td>\n<td>Vector-Quantized Prompt Learning for Paraphrase Generation</td>\n<td>Haotian Luo, Yixin Liu, Peidong Liu, Xianggen Liu</td>\n</tr>\n<tr>\n<td>300</td>\n<td>Findings</td>\n<td>DeltaScore: Story Evaluation with Perturbations</td>\n<td>Zhuohan Xie, Miao Li, Trevor Cohn, Jey Han Lau</td>\n</tr>\n<tr>\n<td>469</td>\n<td>Findings</td>\n<td>Show, Write, and Retrieve: Entity-aware Article Generation and Retrieval</td>\n<td>Zhongping Zhang, Yiwen Gu, Bryan A. Plummer</td>\n</tr>\n<tr>\n<td>575</td>\n<td>Findings</td>\n<td>Adversarial Text Generation by Search and Learning</td>\n<td>Guoyi Li, Bingkang Shi, Zongzhen Liu, Dehan Kong, Yulei Wu, Xiaodan Zhang, Longtao Huang, Honglei Lyu</td>\n</tr>\n<tr>\n<td>651</td>\n<td>Findings</td>\n<td>On Uncertainty Calibration and Selective Generation in Probabilistic Neural Summarization: A Benchmark Study</td>\n<td>Polina Zablotskaia, Du Phan, Joshua Maynez, Shashi Narayan, Jie Ren, Jeremiah Zhe Liu</td>\n</tr>\n<tr>\n<td>731</td>\n<td>Findings</td>\n<td>GROVE: A Retrieval-augmented Complex Story Generation Framework with A Forest of Evidence</td>\n<td>Zhihua Wen, Zhiliang Tian, Wei Wu, Yuxin Yang, Yanqi Shi, Zhen Huang, Dongsheng Li</td>\n</tr>\n<tr>\n<td>963</td>\n<td>Findings</td>\n<td>A Confederacy of Models: a Comprehensive Evaluation of LLMs on Creative Writing</td>\n<td>Carlos Gómez-Rodríguez, Paul Williams</td>\n</tr>\n<tr>\n<td>1154</td>\n<td>Findings</td>\n<td>Can Large Language Models Fix Data Annotation Errors? An Empirical Study Using Debatepedia for Query-Focused Text Summarization</td>\n<td>Md Tahmid Rahman Laskar, Mizanur Rahman, Israt Jahan, Enamul Hoque, Jimmy Huang</td>\n</tr>\n<tr>\n<td>1470</td>\n<td>Findings</td>\n<td>Uniform Complexity for Text Generation</td>\n<td>Joseph Marvin Imperial, Harish Tayyar Madabushi</td>\n</tr>\n<tr>\n<td>1548</td>\n<td>Findings</td>\n<td>Unraveling Downstream Gender Bias from Large Language Models: A Study on AI Educational Writing Assistance</td>\n<td>Thiemo Wambsganss, Xiaotian Su, Vinitra Swamy, Seyed Parsa Neshaei, Roman Rietsche, Tanja Käser</td>\n</tr>\n<tr>\n<td>1562</td>\n<td>Findings</td>\n<td>Geographical Erasure in Language Generation</td>\n<td>Pola Schwöbel, Jacek Golebiowski, Michele Donini, Cedric Archambeau, Danish Pruthi</td>\n</tr>\n<tr>\n<td>1807</td>\n<td>Findings</td>\n<td>Miracle: Towards Personalized Dialogue Generation with Latent-Space Multiple Personal Attribute Control</td>\n<td>Zhenyi Lu, Wei Wei, Xiaoye Qu, Xian-Ling Mao, Dangyang Chen, Jixiong Chen</td>\n</tr>\n<tr>\n<td>1834</td>\n<td>Findings</td>\n<td>A Comprehensive Evaluation of Tool-Assisted Generation Strategies</td>\n<td>Alon Jacovi, Avi Caciularu, Jonathan Herzig, Roee Aharoni, Bernd Bohnet, Mor Geva</td>\n</tr>\n<tr>\n<td>1897</td>\n<td>Findings</td>\n<td>Stylized Dialogue Generation with Feature-Guided Knowledge Augmentation</td>\n<td>Jinpeng Li, Zekai Zhang, Xiuying Chen, Dongyan Zhao, Rui Yan</td>\n</tr>\n<tr>\n<td>1992</td>\n<td>Findings</td>\n<td>Harnessing the power of LLMs: Evaluating human-AI text co-creation through the lens of news headline generation</td>\n<td>Zijian Ding, Alison Smith-Renner, Wenjuan Zhang, Joel R. Tetreault, Alejandro Jaimes</td>\n</tr>\n<tr>\n<td>1993</td>\n<td>Findings</td>\n<td>InfoDiffusion: Information Entropy Aware Diffusion Process for Non-Autoregressive Text Generation</td>\n<td>Renzhi Wang, Jing Li, Piji Li</td>\n</tr>\n<tr>\n<td>2053</td>\n<td>Findings</td>\n<td>The Iron(ic) Melting Pot: Reviewing Human Evaluation in Humour, Irony and Sarcasm Generation</td>\n<td>Tyler Loakman, Aaron Maladry, Chenghua Lin</td>\n</tr>\n<tr>\n<td>2490</td>\n<td>Findings</td>\n<td>Ask To The Point: Open-Domain Entity-Centric Question Generation</td>\n<td>Yuxiang Liu, Jie Huang, Kevin Chang</td>\n</tr>\n<tr>\n<td>2493</td>\n<td>Findings</td>\n<td>Frugal Prompting for Dialog Models</td>\n<td>Bishal Santra, Sakya Basak, Abhinandan De, Manish Gupta, Pawan Goyal</td>\n</tr>\n<tr>\n<td>2716</td>\n<td>Findings</td>\n<td>Towards Informative Open-ended Text Generation with Dynamic Knowledge Triples</td>\n<td>Zixuan Ren, Yang Zhao, Chengqing Zong</td>\n</tr>\n<tr>\n<td>2876</td>\n<td>Findings</td>\n<td>Harnessing the Power of Large Language Models for Empathetic Response Generation: Empirical Investigations and Improvements</td>\n<td>Yushan Qian, Weinan Zhang, Ting Liu</td>\n</tr>\n<tr>\n<td>3010</td>\n<td>Findings</td>\n<td>T5Score: Discriminative Fine-tuning of Generative Evaluation Metrics</td>\n<td>Yiwei Qin, Weizhe Yuan, Graham Neubig, Pengfei Liu</td>\n</tr>\n<tr>\n<td>3019</td>\n<td>Findings</td>\n<td>NLP Evaluation in trouble: On the Need to Measure LLM Data Contamination for each Benchmark</td>\n<td>Oscar Sainz, Jon Ander Campos, Iker García-Ferrero, Julen Etxaniz, Oier Lopez de Lacalle, Eneko Agirre</td>\n</tr>\n<tr>\n<td>3386</td>\n<td>Findings</td>\n<td>Narrative Order Aware Story Generation via Bidirectional Pretraining Model with Optimal Transport Reward</td>\n<td>Zhicong Lu, Li Jin, Guangluan Xu, Linmei Hu, Nayu Liu, Xiaoyu Li, Xian Sun, Zequn Zhang, kaiwen wei</td>\n</tr>\n<tr>\n<td>3613</td>\n<td>Findings</td>\n<td>Goodtriever: Adaptive Toxicity Mitigation with Retrieval-augmented Models</td>\n<td>Luiza Amador Pozzobon, Beyza Ermis, Patrick Lewis, Sara Hooker</td>\n</tr>\n<tr>\n<td>3726</td>\n<td>Findings</td>\n<td>Don’t Add, don’t Miss: Effective Content Preserving Generation from Pre-Selected Text Spans</td>\n<td>Aviv Slobodkin, Avi Caciularu, Eran Hirsch, Ido Dagan</td>\n</tr>\n<tr>\n<td>3802</td>\n<td>Findings</td>\n<td>Ensemble-Instruct: Instruction Tuning Data Generation with a Heterogeneous Mixture of LMs</td>\n<td>Young-Suk Lee, Md Arafat Sultan, Yousef El-Kurdi, Tahira Naseem, Asim Munawar, Radu Florian, Salim Roukos, Ramón Fernandez Astudillo</td>\n</tr>\n<tr>\n<td>4841</td>\n<td>Findings</td>\n<td>A Closer Look into Using Large Language Models for Automatic Evaluation</td>\n<td>Cheng-Han Chiang, Hung-yi Lee</td>\n</tr>\n<tr>\n<td>4954</td>\n<td>Findings</td>\n<td>Pseudointelligence: A Unifying Lens on Language Model Evaluation</td>\n<td>Shikhar Murty, Orr Paradise, Pratyusha Sharma</td>\n</tr>\n<tr>\n<td>5156</td>\n<td>Findings</td>\n<td>Improving Pacing in Long-Form Story Planning</td>\n<td>Yichen Wang, Kevin Yang, Xiaoming Liu, Dan Klein</td>\n</tr>\n<tr>\n<td>5166</td>\n<td>Findings</td>\n<td>“Kelly is a Warm Person, Joseph is a Role Model”: Gender Biases in LLM-Generated Reference Letters</td>\n<td>Yixin Wan, George Pu, Jiao Sun, Aparna Garimella, Kai-Wei Chang, Nanyun Peng</td>\n</tr>\n<tr>\n<td>5563</td>\n<td>Findings</td>\n<td>Bridging Discrete and Continuous Text Spaces for Accelerated Seq2Seq Diffusion Models</td>\n<td>Shansan Gong, Mukai Li, Jiangtao Feng, Zhiyong Wu, Lingpeng Kong</td>\n</tr>\n<tr>\n<td>5603</td>\n<td>Findings</td>\n<td>Exploring Context-Aware Evaluation Metrics for Machine Translation</td>\n<td>Xinyu Hu, Xunjian Yin, Xiaojun Wan</td>\n</tr>\n<tr>\n<td>3</td>\n<td>Main Track</td>\n<td>Contextualizing the Limits of Model &#x26; Evaluation Dataset Curation on Semantic Similarity Classification Tasks</td>\n<td>Daniel Theron</td>\n</tr>\n<tr>\n<td>4</td>\n<td>Main Track</td>\n<td>Dialogue Quality and Emotion Annotations for Customer Support Conversations</td>\n<td>John Mendonca, Patrícia Pereira, Miguel Menezes, Vera Cabarrão, Ana C Farinha, Helena Moniz, Alon Lavie and Isabel Trancoso</td>\n</tr>\n<tr>\n<td>7</td>\n<td>Main Track</td>\n<td>Formalizing content creation and evaluation methods for AI-generated social media content</td>\n<td>Christian Jensen and Axel Højmark</td>\n</tr>\n<tr>\n<td>9</td>\n<td>Main Track</td>\n<td>Automatic Evaluation of Generative Models with Instruction Tuning</td>\n<td>Shuhaib Mehri and Vered Shwartz</td>\n</tr>\n<tr>\n<td>11</td>\n<td>Main Track</td>\n<td>FACTSCORE: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation</td>\n<td>Sewon Min, Kalpesh Krishna, Xinxi Lyu, Mike Lewis, Wen-tau Yih, Pang Wei Koh, Mohit Iyyer, Luke Zettlemoyer and Hannaneh Hajishirzi</td>\n</tr>\n<tr>\n<td>12</td>\n<td>Main Track</td>\n<td>Effective Proxy for Human Labeling: Ensemble Disagreement Scores in Large Language Models for Industrial NLP</td>\n<td>Wei Du, Laksh Advani, Yashmeet Gambhir, Daniel Perry, Prashant Shiralkar, Zhengzheng Xing and Aaron Colak</td>\n</tr>\n<tr>\n<td>14</td>\n<td>Main Track</td>\n<td>Automatic Reflection Generation for Peer-to-Peer Counseling</td>\n<td>Emma O'Neil, João Sedoc, Diyi Yang, Haiyi Zhu and Lyle Ungar</td>\n</tr>\n<tr>\n<td>16</td>\n<td>Main Track</td>\n<td>One-Shot and Few-Shot Exemplification Modeling</td>\n<td>John Harvill, Hee Suk Yoon, Eunseop Yoon, Mark Hasegawa-Johnson and Chang Yoo</td>\n</tr>\n<tr>\n<td>21</td>\n<td>Main Track</td>\n<td>QAMPARI: A Benchmark for Open-domain Questions with Many Answers</td>\n<td>Samuel Amouyal, Tomer Wolfson, Ohad Rubin, Ori Yoran, Jonathan Herzig and Jonathan Berant</td>\n</tr>\n<tr>\n<td>23</td>\n<td>Main Track</td>\n<td>Unveiling Safety Vulnerabilities of Large Language Models</td>\n<td>George Kour, Marcel Zalmanovici, Naama Zwerdling, Esther Goldbraich, Ora Nova Fandina, Ateret Anaby Tavor, Orna Raz and Eitan Farchi</td>\n</tr>\n<tr>\n<td>24</td>\n<td>Main Track</td>\n<td>Adapting Pre-trained Generative Models for Extractive Question Answering</td>\n<td>Prabir Mallick, Tapas Nayak and Indrajit Bhattacharya</td>\n</tr>\n<tr>\n<td>25</td>\n<td>Main Track</td>\n<td>Predicting Question-Answering Performance of Large Language Models through Semantic Consistency</td>\n<td>Ella Rabinovich, Samuel Ackerman, Orna Raz, Eitan Farchi and Ateret Anaby Tavor</td>\n</tr>\n<tr>\n<td>28</td>\n<td>Main Track</td>\n<td>Towards Effective Long-Form QA with Evidence Augmentation</td>\n<td>Mengxia Yu, Sara Rosenthal, Mihaela Bornea and Avi Sil</td>\n</tr>\n<tr>\n<td>30</td>\n<td>Main Track</td>\n<td>Harnessing the Plug-and-Play Controller by Prompting</td>\n<td>Hao Wang and Lei Sha</td>\n</tr>\n<tr>\n<td>32</td>\n<td>Main Track</td>\n<td>Context and Literacy Aware Learnable Metric for Text Simplification</td>\n<td>Jeongwon Kwak, Hyeryun Park, Kyungmo Kim and Jinwook Choi</td>\n</tr>\n<tr>\n<td>33</td>\n<td>Main Track</td>\n<td>Synthetic Dialogue Dataset Generation using LLM Agents</td>\n<td>Yelaman Abdullin, Diego Molla, Bahadorreza Ofoghi, John Yearwood and Qingyang Li</td>\n</tr>\n<tr>\n<td>34</td>\n<td>Main Track</td>\n<td>An Empirical Bayes Framework for Open-Domain Dialogue Generation</td>\n<td>Jing Yang Lee, Kong Aik Lee and Woon Seng Gan</td>\n</tr>\n<tr>\n<td>36</td>\n<td>Main Track</td>\n<td>Flesch or Fumble? Evaluating Readability Standard Alignment of Instruction-Tuned Language Models</td>\n<td>Joseph Marvin Imperial and Harish Tayyar Madabushi</td>\n</tr>\n<tr>\n<td>38</td>\n<td>Main Track</td>\n<td>ChatGPT as a Java Decompiler</td>\n<td>Bradley McDanel and Zhanhao Liu</td>\n</tr>\n<tr>\n<td>41</td>\n<td>Main Track</td>\n<td>Multi-domain Summarization from Leaderboards to Practice: Re-examining Automatic and Human Evaluation</td>\n<td>David Demeter, Oshin Agarwal, Simon Ben Igeri, Marko Sterbentz, Neil Molino, John Conroy and Ani Nenkova</td>\n</tr>\n<tr>\n<td>43</td>\n<td>Main Track</td>\n<td>Targeted Image Data Augmentation Increases Basic Skills Captioning Robustness</td>\n<td>Valentin Barriere, Felipe del Rio, Andres Carvallo, Carlos Aspillaga, Eugenio Herrera-Berg and Cristian Buc</td>\n</tr>\n<tr>\n<td>45</td>\n<td>Main Track</td>\n<td>Separating form and meaning: Using self-consistency to quantify task understanding across multiple senses</td>\n<td>Xenia Ohmer, Elia Bruni and Dieuwke Hupkes</td>\n</tr>\n<tr>\n<td>46</td>\n<td>Main Track</td>\n<td>Text Encoders Lack Knowledge: Leveraging Generative LLMs for Domain-Specific Semantic Textual Similarity</td>\n<td>Joseph Gatto, Omar Sharif, Parker Seegmiller, Philip Bohlman and Sarah Masud Preum</td>\n</tr>\n<tr>\n<td>51</td>\n<td>Main Track</td>\n<td>To Burst or Not to Burst: Generating and Quantifying Improbable Text</td>\n<td>Kuleen Sasse, Efsun Sarioglu Kayi, Samuel Barham and Edward Staley</td>\n</tr>\n<tr>\n<td>52</td>\n<td>Main Track</td>\n<td>Are Large Language Models Reliable Judges? A Study on the Factuality Evaluation Capabilities of LLMs</td>\n<td>Xue-Yong Fu, Md Tahmid Rahman Laskar, Cheng Chen and Shashi Bhushan TN</td>\n</tr>\n<tr>\n<td>54</td>\n<td>Main Track</td>\n<td>RankAug: Augmented data ranking for text classification</td>\n<td>Tiasa Singha Roy and Priyam Basu</td>\n</tr>\n<tr>\n<td>67</td>\n<td>Main Track</td>\n<td>Post Turing: Mapping the landscape of LLM Evaluation</td>\n<td>Alexey Tikhonov and Ivan Yamshchikov</td>\n</tr>\n<tr>\n<td>56</td>\n<td>Main Track</td>\n<td>Elo Uncovered: Robustness and Best Practices in Language Model Evaluation</td>\n<td>Meriem Boubdir, Edward Kim, Beyza Ermis, Sara Hooker and Marzieh Fadaee</td>\n</tr>\n<tr>\n<td>62</td>\n<td>Main Track</td>\n<td>PersonalityChat: Conversation Distillation for Personalized Dialog Modeling with Facts and Traits</td>\n<td>Ehsan Lotfi, Maxime De Bruyn, Jeska Buhmann and Walter Daelemans</td>\n</tr>\n<tr>\n<td>63</td>\n<td>Main Track</td>\n<td>How well ChatGPT understand Malaysian English? An Evaluation on Named Entity Recognition and Relation Extraction</td>\n<td>MohanRaj Chanthran, Lay-Ki Soon, Ong Huey Fang and Bhawani Selvaretnam</td>\n</tr>\n<tr>\n<td>57</td>\n<td>Extended Abstract</td>\n<td>Robust Tooling and New Resources for Large Language Model Evaluation via Catwalk</td>\n<td>Kyle Richardson, Ian Magnusson, Oyvind Tafjord,Akshita Bhagia, Iz Beltagy, Arman Cohan, Pradeep Dasigi,Jesse Dodge, Dirk Groeneveld, Yuling Gu, Ananya Harsh Jha, Tushar Khot and Nishant Subramani</td>\n</tr>\n<tr>\n<td>58</td>\n<td>Extended Abstract</td>\n<td>GUMSum: Multi-Genre Data and Evaluation for English Abstractive Summarization</td>\n<td>Yang Janet Liu and Amir Zeldes</td>\n</tr>\n<tr>\n<td>60</td>\n<td>Extended Abstract</td>\n<td>NewsMet: A ‘Do It All' dataset of contemporary Metaphors in News headlines</td>\n<td>Rohan Joseph, Timothy Liu, Aik Beng Ng, Simon See and Sunny Rai</td>\n</tr>\n<tr>\n<td>20</td>\n<td>Extended Abstract</td>\n<td>On the State of German (Abstractive) Text Summarization</td>\n<td>Dennis Aumiller, Jing Fan and Michael Gertz</td>\n</tr>\n<tr>\n<td>31</td>\n<td>Extended Abstract</td>\n<td>Measuring misogyny in natural language generation: preliminary results from a case study on two Reddit communities</td>\n<td>Aaron Snoswell, Lucinda Nelson, Hao Xue, Flora Salim, Nicolas Suzor and Jean Burgess</td>\n</tr>\n<tr>\n<td>35</td>\n<td>Extended Abstract</td>\n<td>On the Learnability of Watermarks for Language Models</td>\n<td>Chenchen Gu, Xiang Lisa Li, Percy Liang and Tatsunori Hashimoto</td>\n</tr>\n<tr>\n<td>47</td>\n<td>Extended Abstract</td>\n<td>Does Writing with Language Models Reduce Content Diversity?</td>\n<td>Vishakh Padmakumar and He He</td>\n</tr>\n<tr>\n<td>39</td>\n<td>Extended Abstract</td>\n<td>Generative language models exhibit social identity biases</td>\n<td>Tiancheng Hu, Yara Kyrychenko, Jon Roozenbeek and Nigel Collier</td>\n</tr>\n<tr>\n<td>70</td>\n<td>Industry Track</td>\n<td>A Simple yet Efficient Ensemble Approach for AI-generated Text Detection</td>\n<td>Harika Abburi, Kalyani Roy, Michael Suesserman, Nirmala Pudota, Balaji Veeramani, Edward Bowen and Sanmitra Bhattacharya</td>\n</tr>\n<tr>\n<td>17</td>\n<td>Industry Track</td>\n<td>Leveraging Large Language Models for Enhanced Product Descriptions in eCommerce</td>\n<td>Jianghong Zhou, Bo Liu, Jhalak Nilesh Acharya, Yao Hong, Kuang-chih Lee and Musen Wen</td>\n</tr>\n<tr>\n<td>55</td>\n<td>Industry Track</td>\n<td>Separating the Wheat from the Chaff with BREAD: An open-source benchmark and metrics to detect redundancy in text</td>\n<td>Isaac Caswell, Lisa Wang and Isabel Papadimitriou</td>\n</tr>\n</tbody>\n</table>\n<h2 id=\"user-content-organization\">Organization</h2>\n<p><em>Contact</em>:\n<a href=\"mailto:gem-benchmark-chairs@googlegroups.com\">gem-benchmark-chairs@googlegroups.com</a></p>\n<p><em>General Chairs</em></p>\n<p>Khyathi Raghavi Chandu (AI2)</p>\n<p>Elizabeth Clark (Google Deepmind)</p>\n<p>Kaustubh Dhole (Emory University)</p>\n<p>Sebastian Gehrmann (Bloomberg)</p>\n<p>João Sedoc (NYU)</p>\n<p>Alex Wang (Cohere)</p>\n<p><em>Industry Track Chairs</em></p>\n<p>Enrico Santus (Bloomberg)</p>\n<p>Hooman Sedghamiz (Bayer)</p>\n"}},"__N_SSG":true}
\ No newline at end of file
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/workshop/2021.json b/_next/data/V1edrWahfIsCPthpIgASU/workshop/2021.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/workshop/2021.json
rename to _next/data/V1edrWahfIsCPthpIgASU/workshop/2021.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/workshop/2022-call.json b/_next/data/V1edrWahfIsCPthpIgASU/workshop/2022-call.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/workshop/2022-call.json
rename to _next/data/V1edrWahfIsCPthpIgASU/workshop/2022-call.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/workshop/2022.json b/_next/data/V1edrWahfIsCPthpIgASU/workshop/2022.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/workshop/2022.json
rename to _next/data/V1edrWahfIsCPthpIgASU/workshop/2022.json
diff --git a/_next/data/b8rjfKshCOVHfiTDQnV_D/workshop.json b/_next/data/V1edrWahfIsCPthpIgASU/workshop/2023-call.json
similarity index 100%
rename from _next/data/b8rjfKshCOVHfiTDQnV_D/workshop.json
rename to _next/data/V1edrWahfIsCPthpIgASU/workshop/2023-call.json
diff --git a/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js b/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js
new file mode 100644
index 00000000..1936ac5d
--- /dev/null
+++ b/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js
@@ -0,0 +1 @@
+self.__BUILD_MANIFEST=function(s,a,c,t){return{__rewrites:{beforeFiles:[],afterFiles:[],fallback:[]},"/":[s,a,"static/css/553834eb3ba55b34.css","static/chunks/pages/index-2d088c50190f330f.js"],"/_error":["static/chunks/pages/_error-4afcb85b7c260fd3.js"],"/data_cards":[s,a,"static/css/9e5a1d51e5c0dd74.css","static/chunks/pages/data_cards-bf79efad16f23b88.js"],"/data_cards/[id]":[s,a,c,"static/chunks/pages/data_cards/[id]-052721b315d249c5.js"],"/hackathon":[s,a,c,"static/chunks/pages/hackathon-d6bebdd846f9bd04.js"],"/model_cards":[s,a,"static/css/6a1c82b8c4bd78ef.css","static/chunks/pages/model_cards-eb373565bd815f35.js"],"/model_cards/[id]":[s,a,c,"static/chunks/pages/model_cards/[id]-9ac5e4b15e7a7f67.js"],"/nl_augmenter":[s,a,c,"static/chunks/pages/nl_augmenter-dabf1f7163a4c1fd.js"],"/panel":["static/chunks/pages/panel-91a3eda0e134807f.js"],"/papers":[s,a,"static/css/f04ddab54834d245.css","static/chunks/pages/papers-90207f0fdfe1fa9c.js"],"/resources":[s,a,"static/css/5fcd590ccf37fef1.css","static/chunks/pages/resources-0cab39da6b7e3a17.js"],"/results":[s,"static/chunks/29107295-809b6f0b05884bf7.js",a,"static/chunks/147-b9fd18d139b855ac.js","static/css/afca8ff2be2c1f7a.css","static/chunks/pages/results-2f15550ebb6e9ca7.js"],"/shared_task":[s,a,c,"static/chunks/pages/shared_task-18f5bf90896da33a.js"],"/team":[s,t,a,"static/css/be720738ed0b38ae.css","static/chunks/pages/team-60b30d02a89aa79d.js"],"/team/2021":[s,t,a,"static/css/6517b3935a1e344f.css","static/chunks/pages/team/2021-13f83fded5cb2810.js"],"/team/join":[s,a,"static/css/bceb2d77c77db79f.css","static/chunks/pages/team/join-1f15410a6fdefaa2.js"],"/turker_faq":[s,a,c,"static/chunks/pages/turker_faq-00cb2ea336fe51b5.js"],"/tutorials":[s,a,"static/css/0aee61fa7f903b6c.css","static/chunks/pages/tutorials-29d01441a932687d.js"],"/tutorials/[id]":[s,a,c,"static/chunks/pages/tutorials/[id]-69574b54cf872f16.js"],"/workshop":[s,a,c,"static/chunks/pages/workshop-ab0e5c9fcf25aeda.js"],"/workshop/2021":[s,a,c,"static/chunks/pages/workshop/2021-f9fcbddb51e9ee43.js"],"/workshop/2022":[s,a,c,"static/chunks/pages/workshop/2022-0e921309e3e202c4.js"],"/workshop/2022-call":[s,a,c,"static/chunks/pages/workshop/2022-call-43c4e2f64520f9cb.js"],"/workshop/2023-call":[s,a,c,"static/chunks/pages/workshop/2023-call-1b0cb7c36f248bb5.js"],sortedPages:["/","/_app","/_error","/data_cards","/data_cards/[id]","/hackathon","/model_cards","/model_cards/[id]","/nl_augmenter","/panel","/papers","/resources","/results","/shared_task","/team","/team/2021","/team/join","/turker_faq","/tutorials","/tutorials/[id]","/workshop","/workshop/2021","/workshop/2022","/workshop/2022-call","/workshop/2023-call"]}}("static/chunks/c16184b3-ddb1b99b5e568a2a.js","static/chunks/50-3dccc3616b494db8.js","static/css/50ad98e60bd49ad7.css","static/chunks/2cca2479-7e9f1af5d51da309.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
\ No newline at end of file
diff --git a/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js b/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js
similarity index 78%
rename from _next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js
rename to _next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js
index 4de70639..4c0f1a4c 100644
--- a/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js
+++ b/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js
@@ -1 +1 @@
-self.__SSG_MANIFEST=new Set(["\u002Fdata_cards","\u002Fdata_cards\u002F[id]","\u002Fhackathon","\u002Fmodel_cards","\u002Fmodel_cards\u002F[id]","\u002Fnl_augmenter","\u002Fresults","\u002Fshared_task","\u002Fteam","\u002Fteam\u002F2021","\u002Fturker_faq","\u002Ftutorials","\u002Ftutorials\u002F[id]","\u002Fworkshop","\u002Fworkshop\u002F2021","\u002Fworkshop\u002F2022","\u002Fworkshop\u002F2022-call"]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
\ No newline at end of file
+self.__SSG_MANIFEST=new Set(["\u002Fdata_cards","\u002Fdata_cards\u002F[id]","\u002Fhackathon","\u002Fmodel_cards","\u002Fmodel_cards\u002F[id]","\u002Fnl_augmenter","\u002Fresults","\u002Fshared_task","\u002Fteam","\u002Fteam\u002F2021","\u002Fturker_faq","\u002Ftutorials","\u002Ftutorials\u002F[id]","\u002Fworkshop","\u002Fworkshop\u002F2021","\u002Fworkshop\u002F2022","\u002Fworkshop\u002F2022-call","\u002Fworkshop\u002F2023-call"]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
\ No newline at end of file
diff --git a/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js b/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js
deleted file mode 100644
index cdb5bfaa..00000000
--- a/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js
+++ /dev/null
@@ -1 +0,0 @@
-self.__BUILD_MANIFEST=function(s,a,c,e){return{__rewrites:{beforeFiles:[],afterFiles:[],fallback:[]},"/":[s,a,"static/css/553834eb3ba55b34.css","static/chunks/pages/index-51845c4fd329985f.js"],"/_error":["static/chunks/pages/_error-4afcb85b7c260fd3.js"],"/data_cards":[s,a,"static/css/9e5a1d51e5c0dd74.css","static/chunks/pages/data_cards-822c194007d84081.js"],"/data_cards/[id]":[s,a,c,"static/chunks/pages/data_cards/[id]-54179cce9b48b926.js"],"/hackathon":[s,a,c,"static/chunks/pages/hackathon-5aa098cfaafb9146.js"],"/model_cards":[s,a,"static/css/6a1c82b8c4bd78ef.css","static/chunks/pages/model_cards-c22039ba8c09fe44.js"],"/model_cards/[id]":[s,a,c,"static/chunks/pages/model_cards/[id]-9cbdb0ece408e680.js"],"/nl_augmenter":[s,a,c,"static/chunks/pages/nl_augmenter-908a5b0d2875bb36.js"],"/panel":["static/chunks/pages/panel-91a3eda0e134807f.js"],"/papers":[s,a,"static/css/f04ddab54834d245.css","static/chunks/pages/papers-21975dff751cae66.js"],"/resources":[s,a,"static/css/5fcd590ccf37fef1.css","static/chunks/pages/resources-a2ebdb8ec0162ade.js"],"/results":[s,"static/chunks/29107295-809b6f0b05884bf7.js",a,"static/chunks/147-b9fd18d139b855ac.js","static/css/afca8ff2be2c1f7a.css","static/chunks/pages/results-d578c869a5275698.js"],"/shared_task":[s,a,c,"static/chunks/pages/shared_task-05cde75cf2b87867.js"],"/team":[s,e,a,"static/css/be720738ed0b38ae.css","static/chunks/pages/team-52cb272d62212456.js"],"/team/2021":[s,e,a,"static/css/6517b3935a1e344f.css","static/chunks/pages/team/2021-82841191601aed91.js"],"/team/join":[s,a,"static/css/bceb2d77c77db79f.css","static/chunks/pages/team/join-c4bb4d37c8c737a1.js"],"/turker_faq":[s,a,c,"static/chunks/pages/turker_faq-48f013534070af29.js"],"/tutorials":[s,a,"static/css/0aee61fa7f903b6c.css","static/chunks/pages/tutorials-a69ad6be7eda1572.js"],"/tutorials/[id]":[s,a,c,"static/chunks/pages/tutorials/[id]-78530e674236e7c8.js"],"/workshop":[s,a,c,"static/chunks/pages/workshop-b31f31ce3cd1987f.js"],"/workshop/2021":[s,a,c,"static/chunks/pages/workshop/2021-01c29971917ca8b8.js"],"/workshop/2022":[s,a,"static/chunks/pages/workshop/2022-09b035959070f75b.js",c],"/workshop/2022-call":[s,a,c,"static/chunks/pages/workshop/2022-call-ae9c70e62c8298a6.js"],sortedPages:["/","/_app","/_error","/data_cards","/data_cards/[id]","/hackathon","/model_cards","/model_cards/[id]","/nl_augmenter","/panel","/papers","/resources","/results","/shared_task","/team","/team/2021","/team/join","/turker_faq","/tutorials","/tutorials/[id]","/workshop","/workshop/2021","/workshop/2022","/workshop/2022-call"]}}("static/chunks/c16184b3-ddb1b99b5e568a2a.js","static/chunks/50-3dccc3616b494db8.js","static/css/50ad98e60bd49ad7.css","static/chunks/2cca2479-7e9f1af5d51da309.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
\ No newline at end of file
diff --git a/_next/static/chunks/pages/data_cards-822c194007d84081.js b/_next/static/chunks/pages/data_cards-822c194007d84081.js
deleted file mode 100644
index 915b09ba..00000000
--- a/_next/static/chunks/pages/data_cards-822c194007d84081.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[711],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return F},y:function(){return C}});var s=n(9008),t=n.n(s),r=n(2717),l=n.n(r),i=n(1943),c=n.n(i),o=n(7839),d=n.n(o),h=n(1664),_=n.n(h),u=n(2777),m=n(2262),g=n(748),x=n(5959),p=n(3553),f=n(7247),j=n(7294),v=n(4776),b=n.n(v),N=n(9417),k=n(7814),w=n(5893),y=function(e){(0,x.Z)(s,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,f.Z)(s);if(a){var t=(0,f.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,p.Z)(this,e)});function s(e){var a;return(0,u.Z)(this,s),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(s,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,w.jsxs)("div",{className:b().navwrapper,children:[(0,w.jsx)("div",{className:b().gradbar}),(0,w.jsxs)("nav",{className:b().navbar,children:[(0,w.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,w.jsx)(_(),{href:"/",children:(0,w.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,w.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,w.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,w.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,w.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,w.jsx)(_(),{href:"/resources/",children:(0,w.jsx)("a",{children:"Resources"})})}),(0,w.jsx)("li",{className:b().navitem,children:(0,w.jsx)(_(),{href:"/data_cards/",children:(0,w.jsx)("a",{children:"Data Cards"})})}),(0,w.jsx)("li",{className:b().navitem,children:(0,w.jsx)(_(),{href:"/model_cards",children:(0,w.jsx)("a",{children:"Model Cards"})})}),(0,w.jsx)("li",{className:b().navitem,children:(0,w.jsx)(_(),{href:"/tutorials",children:(0,w.jsx)("a",{children:"tutorials"})})}),(0,w.jsx)("li",{className:b().navitem,children:(0,w.jsx)(_(),{href:"/results/",children:(0,w.jsx)("a",{children:"Results"})})}),(0,w.jsx)("li",{className:b().navitem,children:(0,w.jsx)(_(),{href:"/papers/",children:(0,w.jsx)("a",{children:"Papers"})})}),(0,w.jsx)("li",{className:b().navitem,children:(0,w.jsx)(_(),{href:"/workshop",children:(0,w.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),s}(j.Component),C="GEM";function F(e){var a=e.children,n=e.home,s=e.nlAugmenter,r=e.wideContainer;return(0,w.jsxs)(w.Fragment,{children:[(0,w.jsxs)(t(),{children:[(0,w.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,w.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,w.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,w.jsx)("meta",{name:"og:title",content:C}),(0,w.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,w.jsxs)("div",{className:"".concat(l().background," ").concat(s&&c().background),children:[(0,w.jsx)("header",{className:l().header,children:(0,w.jsx)(y,{})}),(0,w.jsxs)("div",{className:"".concat(l().container," ").concat(r&&l().wideContainer),children:[(0,w.jsx)("main",{children:a}),(0,w.jsx)("div",{className:l().push})]}),(0,w.jsxs)("footer",{className:l().footer+" "+d().eggshell,children:[!n&&(0,w.jsx)("span",{className:l().backToHome,children:(0,w.jsx)(_(),{href:"/",children:(0,w.jsx)("a",{children:"← Home"})})}),(0,w.jsxs)("span",{children:["If you have any questions, please join our ",(0,w.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},1843:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return u},default:function(){return m}});var s=n(9008),t=n.n(s),r=n(1664),l=n.n(r),i=n(7839),c=n.n(i),o=n(6057),d=n(8874),h=n.n(d),_=n(5893),u=!0;function m(e){var a=e.allTasksData;return(0,_.jsxs)(o.Z,{children:[(0,_.jsx)(t(),{children:(0,_.jsx)("title",{children:"GEM Tasks"})}),(0,_.jsxs)("section",{children:[(0,_.jsx)("h2",{className:c().headingXl,children:"List of Tasks"}),(0,_.jsxs)("p",{className:h().description,children:["The list below links to data statements [",(0,_.jsx)(l(),{href:"https://www.aclweb.org/anthology/Q18-1041/",children:(0,_.jsx)("a",{target:"_blank",children:"1"})}),", ",(0,_.jsx)(l(),{href:"https://arxiv.org/abs/1803.09010",children:(0,_.jsx)("a",{target:"_blank",children:"2"})}),"] for each of the datasets that are part of GEM tasks. The template used to produce the initial statements and a guide on how to write them can be found here: [",(0,_.jsx)(l(),{href:"/statement_template.md",children:(0,_.jsx)("a",{download:!0,target:"_blank",children:"download template"})}),"] [",(0,_.jsx)(l(),{href:"/tutorials/writing_a_data_card",children:(0,_.jsx)("a",{children:"view guide"})}),"]. We have released an extended version of this template and an\xa0",(0,_.jsx)(l(),{href:"https://huggingface.co/spaces/GEM/DatasetCardForm",children:(0,_.jsx)("a",{target:"_blank",children:"interactive collection tool"})}),"."]}),(0,_.jsx)("ul",{className:c().list,children:a.map(function(e){var a=e.id,n=e.title,s=e.type,t=e.languages,r=e.summary;return(0,_.jsxs)("li",{className:c().listItem,children:[(0,_.jsx)(l(),{href:"/data_cards/".concat(a),children:(0,_.jsx)("a",{className:h().larger,children:n})}),(0,_.jsx)("span",{className:c().smallSpace}),(0,_.jsx)("small",{className:c().lightText,children:s}),(0,_.jsx)("span",{className:c().smallSpace}),"|",(0,_.jsx)("span",{className:c().smallSpace}),(0,_.jsx)("small",{className:c().lightText,children:t}),(0,_.jsx)("span",{className:c().smallSpace}),(0,_.jsx)("div",{className:h().dataset,children:r})]},a)})})]})]})}},2323:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/data_cards",function(){return n(1843)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},8874:function(e){e.exports={description:"data_cards_description__V02ne",larger:"data_cards_larger__T1vAu",dataset:"data_cards_dataset__nB1Jn"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=2323)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/data_cards-bf79efad16f23b88.js b/_next/static/chunks/pages/data_cards-bf79efad16f23b88.js
new file mode 100644
index 00000000..605af347
--- /dev/null
+++ b/_next/static/chunks/pages/data_cards-bf79efad16f23b88.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[711],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return B}});var s=n(9008),t=n.n(s),r=n(2717),l=n.n(r),i=n(1943),c=n.n(i),o=n(7839),h=n.n(o),d=n(1664),_=n.n(d),u=n(2777),g=n(2262),m=n(748),x=n(5959),v=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),N=n(7814),k=n(5893),w=function(e){(0,x.Z)(s,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(s);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,v.Z)(this,e)});function s(e){var a;return(0,u.Z)(this,s),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(s,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:h().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(N.G,{className:b().bar,icon:y.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),s}(f.Component),B="GEM";function C(e){var a=e.children,n=e.home,s=e.nlAugmenter,r=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(t(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:B}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(l().background," ").concat(s&&c().background),children:[(0,k.jsx)("header",{className:l().header,children:(0,k.jsx)(w,{})}),(0,k.jsxs)("div",{className:"".concat(l().container," ").concat(r&&l().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:l().push})]}),(0,k.jsxs)("footer",{className:l().footer+" "+h().eggshell,children:[!n&&(0,k.jsx)("span",{className:l().backToHome,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:h().accentUnderline,children:"google group"})," for support."]})]})]})]})}},1843:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return u},default:function(){return g}});var s=n(9008),t=n.n(s),r=n(1664),l=n.n(r),i=n(7839),c=n.n(i),o=n(6057),h=n(8874),d=n.n(h),_=n(5893),u=!0;function g(e){var a=e.allTasksData;return(0,_.jsxs)(o.Z,{children:[(0,_.jsx)(t(),{children:(0,_.jsx)("title",{children:"GEM Tasks"})}),(0,_.jsxs)("section",{children:[(0,_.jsx)("h2",{className:c().headingXl,children:"List of Tasks"}),(0,_.jsxs)("p",{className:d().description,children:["The list below links to data statements [",(0,_.jsx)(l(),{legacyBehavior:!0,href:"https://www.aclweb.org/anthology/Q18-1041/",children:(0,_.jsx)("a",{target:"_blank",children:"1"})}),", ",(0,_.jsx)(l(),{legacyBehavior:!0,href:"https://arxiv.org/abs/1803.09010",children:(0,_.jsx)("a",{target:"_blank",children:"2"})}),"] for each of the datasets that are part of GEM tasks. The template used to produce the initial statements and a guide on how to write them can be found here: [",(0,_.jsx)(l(),{legacyBehavior:!0,href:"/statement_template.md",children:(0,_.jsx)("a",{download:!0,target:"_blank",children:"download template"})}),"] [",(0,_.jsx)(l(),{legacyBehavior:!0,href:"/tutorials/writing_a_data_card",children:(0,_.jsx)("a",{children:"view guide"})}),"]. We have released an extended version of this template and an\xa0",(0,_.jsx)(l(),{legacyBehavior:!0,href:"https://huggingface.co/spaces/GEM/DatasetCardForm",children:(0,_.jsx)("a",{target:"_blank",children:"interactive collection tool"})}),"."]}),(0,_.jsx)("ul",{className:c().list,children:a.map(function(e){var a=e.id,n=e.title,s=e.type,t=e.languages,r=e.summary;return(0,_.jsxs)("li",{className:c().listItem,children:[(0,_.jsx)(l(),{legacyBehavior:!0,href:"/data_cards/".concat(a),children:(0,_.jsx)("a",{className:d().larger,children:n})}),(0,_.jsx)("span",{className:c().smallSpace}),(0,_.jsx)("small",{className:c().lightText,children:s}),(0,_.jsx)("span",{className:c().smallSpace}),"|",(0,_.jsx)("span",{className:c().smallSpace}),(0,_.jsx)("small",{className:c().lightText,children:t}),(0,_.jsx)("span",{className:c().smallSpace}),(0,_.jsx)("div",{className:d().dataset,children:r})]},a)})})]})]})}},2323:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/data_cards",function(){return n(1843)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},8874:function(e){e.exports={description:"data_cards_description__V02ne",larger:"data_cards_larger__T1vAu",dataset:"data_cards_dataset__nB1Jn"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=2323)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/data_cards/[id]-052721b315d249c5.js b/_next/static/chunks/pages/data_cards/[id]-052721b315d249c5.js
new file mode 100644
index 00000000..82b04143
--- /dev/null
+++ b/_next/static/chunks/pages/data_cards/[id]-052721b315d249c5.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[413],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return B}});var r=n(9008),s=n.n(r),t=n(2717),i=n.n(t),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),N=n(7814),k=n(5893),w=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var s=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,s)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(N.G,{className:b().bar,icon:y.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),B="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,t=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(s(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:B}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,k.jsx)("header",{className:i().header,children:(0,k.jsx)(w,{})}),(0,k.jsxs)("div",{className:"".concat(i().container," ").concat(t&&i().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:i().push})]}),(0,k.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,k.jsx)("span",{className:i().backToHome,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},5759:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return m},default:function(){return v}});var r=n(6057),s=n(9008),t=n.n(s),i=n(1664),l=n.n(i),c=n(4298),o=n.n(c),_=n(7839),h=n.n(_),d=n(9417),u=n(7814),g=n(5893),m=!0;function v(e){var a=e.taskData;return(0,g.jsxs)(r.Z,{wideContainer:!0,children:[(0,g.jsx)(t(),{children:(0,g.jsxs)("title",{children:["GEM ",a.title]})}),(0,g.jsxs)("article",{children:[(0,g.jsx)(l(),{href:"/data_cards/",children:(0,g.jsx)("a",{children:(0,g.jsx)(u.G,{className:h().icon,icon:d.acZ})})}),(0,g.jsx)("span",{className:h().spacer}),(0,g.jsx)("span",{className:h().headingXl,children:a.title}),(0,g.jsx)("span",{className:h().smallSpace}),(0,g.jsx)("span",{className:h().lightText,children:a.type}),(0,g.jsx)("div",{className:"datacard-wrapper",dangerouslySetInnerHTML:{__html:a.contentHtml}}),(0,g.jsx)(o(),{src:"/datacard.js",strategy:"lazyOnload"})]})]})}},5261:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/data_cards/[id]",function(){return n(5759)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}},4298:function(e,a,n){e.exports=n(6718)}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5261)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/data_cards/[id]-54179cce9b48b926.js b/_next/static/chunks/pages/data_cards/[id]-54179cce9b48b926.js
deleted file mode 100644
index 9eeb36df..00000000
--- a/_next/static/chunks/pages/data_cards/[id]-54179cce9b48b926.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[413],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return F},y:function(){return C}});var s=n(9008),t=n.n(s),r=n(2717),i=n.n(r),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),d=n(1664),h=n.n(d),u=n(2777),m=n(2262),g=n(748),x=n(5959),p=n(3553),v=n(7247),f=n(7294),j=n(4776),b=n.n(j),N=n(9417),k=n(7814),y=n(5893),w=function(e){(0,x.Z)(s,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,v.Z)(s);if(a){var t=(0,v.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,p.Z)(this,e)});function s(e){var a;return(0,u.Z)(this,s),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(s,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(h(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),s}(f.Component),C="GEM";function F(e){var a=e.children,n=e.home,s=e.nlAugmenter,r=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(t(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(s&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(r&&i().wideContainer),children:[(0,y.jsx)("main",{children:a}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},5759:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return g},default:function(){return x}});var s=n(6057),t=n(9008),r=n.n(t),i=n(1664),l=n.n(i),c=n(4298),o=n.n(c),_=n(7839),d=n.n(_),h=n(9417),u=n(7814),m=n(5893),g=!0;function x(e){var a=e.taskData;return(0,m.jsxs)(s.Z,{wideContainer:!0,children:[(0,m.jsx)(r(),{children:(0,m.jsxs)("title",{children:["GEM ",a.title]})}),(0,m.jsxs)("article",{children:[(0,m.jsx)(l(),{href:"/data_cards/",children:(0,m.jsx)("a",{children:(0,m.jsx)(u.G,{className:d().icon,icon:h.acZ})})}),(0,m.jsx)("span",{className:d().spacer}),(0,m.jsx)("span",{className:d().headingXl,children:a.title}),(0,m.jsx)("span",{className:d().smallSpace}),(0,m.jsx)("span",{className:d().lightText,children:a.type}),(0,m.jsx)("div",{className:"datacard-wrapper",dangerouslySetInnerHTML:{__html:a.contentHtml}}),(0,m.jsx)(o(),{src:"/datacard.js",strategy:"lazyOnload"})]})]})}},5261:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/data_cards/[id]",function(){return n(5759)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}},4298:function(e,a,n){e.exports=n(6718)}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5261)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/hackathon-5aa098cfaafb9146.js b/_next/static/chunks/pages/hackathon-5aa098cfaafb9146.js
deleted file mode 100644
index 3e223a28..00000000
--- a/_next/static/chunks/pages/hackathon-5aa098cfaafb9146.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[524],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return F},y:function(){return C}});var t=a(9008),r=a.n(t),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),d=a.n(h),u=a(2777),m=a(2262),g=a(748),v=a(5959),x=a(3553),f=a(7247),p=a(7294),j=a(4776),b=a.n(j),k=a(9417),N=a(7814),y=a(5893),w=function(e){(0,v.Z)(t,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,f.Z)(t);if(n){var r=(0,f.Z)(this).constructor;e=Reflect.construct(a,arguments,r)}else e=a.apply(this,arguments);return(0,x.Z)(this,e)});function t(e){var n;return(0,u.Z)(this,t),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(t,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(N.G,{className:b().bar,icon:k.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(d(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),t}(p.Component),C="GEM";function F(e){var n=e.children,a=e.home,t=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(t&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},2794:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var t=a(6057),r=a(9008),s=a.n(r),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.sharedTaskData;return(0,c.jsxs)(t.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEMv2 Hackathon"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"Hackathon for GEMv2"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},4865:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/hackathon",function(){return a(2794)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=4865)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/hackathon-d6bebdd846f9bd04.js b/_next/static/chunks/pages/hackathon-d6bebdd846f9bd04.js
new file mode 100644
index 00000000..4ffcb501
--- /dev/null
+++ b/_next/static/chunks/pages/hackathon-d6bebdd846f9bd04.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[524],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return w}});var r=n(9008),t=n.n(r),i=n(2717),s=n.n(i),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),f=n(7247),p=n(7294),j=n(4776),b=n.n(j),y=n(9417),k=n(7814),N=n(5893),B=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,f.Z)(r);if(a){var t=(0,f.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(p.Component),w="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:w}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(s().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:s().header,children:(0,N.jsx)(B,{})}),(0,N.jsxs)("div",{className:"".concat(s().container," ").concat(i&&s().wideContainer),children:[(0,N.jsx)("main",{children:a}),(0,N.jsx)("div",{className:s().push})]}),(0,N.jsxs)("footer",{className:s().footer+" "+_().eggshell,children:[!n&&(0,N.jsx)("span",{className:s().backToHome,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},2794:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),i=n.n(t),s=n(7839),l=n.n(s),c=n(5893),o=!0;function _(e){var a=e.sharedTaskData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(i(),{children:(0,c.jsx)("title",{children:"GEMv2 Hackathon"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"Hackathon for GEMv2"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},4865:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/hackathon",function(){return n(2794)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=4865)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/index-2d088c50190f330f.js b/_next/static/chunks/pages/index-2d088c50190f330f.js
new file mode 100644
index 00000000..ec37dc65
--- /dev/null
+++ b/_next/static/chunks/pages/index-2d088c50190f330f.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[405],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return w}});var r=n(9008),s=n.n(r),i=n(2717),t=n.n(i),l=n(1943),c=n.n(l),o=n(7839),d=n.n(o),h=n(1664),_=n.n(h),u=n(2777),g=n(2262),m=n(748),x=n(5959),v=n(3553),p=n(7247),j=n(7294),f=n(4776),b=n.n(f),y=n(9417),N=n(7814),k=n(5893),B=function(e){(0,x.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var s=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,s)}else e=n.apply(this,arguments);return(0,v.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(N.G,{className:b().bar,icon:y.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(j.Component),w="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(s(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:w}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(t().background," ").concat(r&&c().background),children:[(0,k.jsx)("header",{className:t().header,children:(0,k.jsx)(B,{})}),(0,k.jsxs)("div",{className:"".concat(t().container," ").concat(i&&t().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:t().push})]}),(0,k.jsxs)("footer",{className:t().footer+" "+d().eggshell,children:[!n&&(0,k.jsx)("span",{className:t().backToHome,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},5989:function(e,a,n){"use strict";n.r(a),n.d(a,{default:function(){return h}});var r=n(9008),s=n.n(r),i=n(6057),t=n(1664),l=n.n(t),c=n(4409),o=n.n(c),d=n(5893);function h(){return(0,d.jsxs)(i.Z,{home:!0,children:[(0,d.jsx)(s(),{children:(0,d.jsx)("title",{children:i.y})}),(0,d.jsxs)("div",{className:o().centerpage,children:[(0,d.jsx)("p",{className:o().description,children:"GEM is a benchmark environment for Natural Language Generation with a focus on its Evaluation, both through human annotations and automated Metrics."}),(0,d.jsx)("p",{className:o().description,children:"GEM aims to:"}),(0,d.jsxs)("ul",{className:o().description,children:[(0,d.jsx)("li",{children:"measure NLG progress across many NLG tasks across languages."}),(0,d.jsx)("li",{children:"audit data and models and present results via data cards and model robustness reports."}),(0,d.jsx)("li",{children:"develop standards for evaluation of generated text using both automated and human metrics."})]}),(0,d.jsx)("p",{className:o().description,children:"We will regularly update GEM and to encourage more inclusive practices in evaluation by extending existing data or developing datasets for additional languages."}),(0,d.jsxs)("div",{className:o().grid,children:[(0,d.jsx)(l(),{legacyBehavior:!0,href:"/data_cards/",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Data Cards"})})}),(0,d.jsx)(l(),{legacyBehavior:!0,href:"/tutorials",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Tutorials"})})}),(0,d.jsx)(l(),{legacyBehavior:!0,href:"/results/",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Results"})})}),(0,d.jsx)(l(),{legacyBehavior:!0,href:"/papers",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Papers"})})}),(0,d.jsx)(l(),{legacyBehavior:!0,href:"/nl_augmenter",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"NL-Augmenter"})})}),(0,d.jsx)(l(),{legacyBehavior:!0,href:"/workshop",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Workshop"})})})]})]})]})}},5557:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/",function(){return n(5989)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},4409:function(e){e.exports={title:"index_title__Hhl0T",description:"index_description__3Pgig",grid:"index_grid__m40sg",card:"index_card__e904y",centerpage:"index_centerpage__4uMpW"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5557)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/index-51845c4fd329985f.js b/_next/static/chunks/pages/index-51845c4fd329985f.js
deleted file mode 100644
index 9382e396..00000000
--- a/_next/static/chunks/pages/index-51845c4fd329985f.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[405],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return E},y:function(){return C}});var s=n(9008),r=n.n(s),i=n(2717),t=n.n(i),l=n(1943),c=n.n(l),o=n(7839),d=n.n(o),h=n(1664),_=n.n(h),u=n(2777),m=n(2262),g=n(748),x=n(5959),p=n(3553),j=n(7247),v=n(7294),f=n(4776),b=n.n(f),N=n(9417),k=n(7814),y=n(5893),w=function(e){(0,x.Z)(s,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,j.Z)(s);if(a){var r=(0,j.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,p.Z)(this,e)});function s(e){var a;return(0,u.Z)(this,s),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(s,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(_(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(_(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),s}(v.Component),C="GEM";function E(e){var a=e.children,n=e.home,s=e.nlAugmenter,i=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(t().background," ").concat(s&&c().background),children:[(0,y.jsx)("header",{className:t().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(t().container," ").concat(i&&t().wideContainer),children:[(0,y.jsx)("main",{children:a}),(0,y.jsx)("div",{className:t().push})]}),(0,y.jsxs)("footer",{className:t().footer+" "+d().eggshell,children:[!n&&(0,y.jsx)("span",{className:t().backToHome,children:(0,y.jsx)(_(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},5989:function(e,a,n){"use strict";n.r(a),n.d(a,{default:function(){return h}});var s=n(9008),r=n.n(s),i=n(6057),t=n(1664),l=n.n(t),c=n(4409),o=n.n(c),d=n(5893);function h(){return(0,d.jsxs)(i.Z,{home:!0,children:[(0,d.jsx)(r(),{children:(0,d.jsx)("title",{children:i.y})}),(0,d.jsxs)("div",{className:o().centerpage,children:[(0,d.jsx)("p",{className:o().description,children:"GEM is a benchmark environment for Natural Language Generation with a focus on its Evaluation, both through human annotations and automated Metrics."}),(0,d.jsx)("p",{className:o().description,children:"GEM aims to:"}),(0,d.jsxs)("ul",{className:o().description,children:[(0,d.jsx)("li",{children:"measure NLG progress across many NLG tasks across languages."}),(0,d.jsx)("li",{children:"audit data and models and present results via data cards and model robustness reports."}),(0,d.jsx)("li",{children:"develop standards for evaluation of generated text using both automated and human metrics."})]}),(0,d.jsx)("p",{className:o().description,children:"We will regularly update GEM and to encourage more inclusive practices in evaluation by extending existing data or developing datasets for additional languages."}),(0,d.jsxs)("div",{className:o().grid,children:[(0,d.jsx)(l(),{href:"/data_cards/",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Data Cards"})})}),(0,d.jsx)(l(),{href:"/tutorials",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Tutorials"})})}),(0,d.jsx)(l(),{href:"/results/",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Results"})})}),(0,d.jsx)(l(),{href:"/papers",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Papers"})})}),(0,d.jsx)(l(),{href:"/nl_augmenter",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"NL-Augmenter"})})}),(0,d.jsx)(l(),{href:"/workshop",children:(0,d.jsx)("a",{className:o().card,children:(0,d.jsx)("h3",{children:"Workshop"})})})]})]})]})}},5557:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/",function(){return n(5989)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},4409:function(e){e.exports={title:"index_title__Hhl0T",description:"index_description__3Pgig",grid:"index_grid__m40sg",card:"index_card__e904y",centerpage:"index_centerpage__4uMpW"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5557)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/model_cards-c22039ba8c09fe44.js b/_next/static/chunks/pages/model_cards-c22039ba8c09fe44.js
deleted file mode 100644
index 0dd8dad3..00000000
--- a/_next/static/chunks/pages/model_cards-c22039ba8c09fe44.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[467],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return M}});var s=n(9008),r=n.n(s),t=n(2717),l=n.n(t),i=n(1943),c=n.n(i),o=n(7839),d=n.n(o),_=n(1664),h=n.n(_),u=n(2777),m=n(2262),g=n(748),p=n(5959),x=n(3553),f=n(7247),v=n(7294),j=n(4776),b=n.n(j),N=n(9417),k=n(7814),y=n(5893),w=function(e){(0,p.Z)(s,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,f.Z)(s);if(a){var r=(0,f.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function s(e){var a;return(0,u.Z)(this,s),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(s,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(h(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),s}(v.Component),M="GEM";function C(e){var a=e.children,n=e.home,s=e.nlAugmenter,t=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:M}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(l().background," ").concat(s&&c().background),children:[(0,y.jsx)("header",{className:l().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(l().container," ").concat(t&&l().wideContainer),children:[(0,y.jsx)("main",{children:a}),(0,y.jsx)("div",{className:l().push})]}),(0,y.jsxs)("footer",{className:l().footer+" "+d().eggshell,children:[!n&&(0,y.jsx)("span",{className:l().backToHome,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},7431:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return u},default:function(){return m}});var s=n(6057),r=n(1664),t=n.n(r),l=n(9008),i=n.n(l),c=n(7839),o=n.n(c),d=n(7239),_=n.n(d),h=n(5893),u=!0;function m(e){var a=e.allData;return(0,h.jsxs)(s.Z,{children:[(0,h.jsx)(i(),{children:(0,h.jsx)("title",{children:"GEM Model Cards"})}),(0,h.jsxs)("article",{children:[(0,h.jsx)("span",{className:o().headingXl,children:"GEM Model Cards"}),(0,h.jsxs)("p",{className:_().description,children:["The list below links to the work-in-progress data cards for models submitted to GEM. As part of our submission process, we ask participants a series of questions about their models. The current version of our model cards lists the provided answers verbatim. The submission form can be found ",(0,h.jsx)("a",{href:"https://forms.gle/pds6cbBf2Gf2VGMv7",target:"_blank",children:"here"}),". The template used to produce the statements and can be found here: [",(0,h.jsx)(t(),{href:"/model_card_template.md",children:(0,h.jsx)("a",{download:!0,target:"_blank",children:"download template"})}),"]."]}),(0,h.jsx)("span",{className:o().smallSpace}),(0,h.jsx)("ul",{className:o().list,children:a.map(function(e){var a=e.id,n=e.title,s=e.type,r=e.background;return(0,h.jsxs)("li",{className:o().listItem,children:[(0,h.jsx)(t(),{href:"/model_cards/".concat(a),children:(0,h.jsx)("a",{className:_().larger,children:n})}),(0,h.jsx)("span",{className:o().smallSpace}),(0,h.jsx)("small",{className:o().lightText,children:s}),(0,h.jsx)("span",{className:o().smallSpace}),(0,h.jsx)("div",{className:_().model,children:r})]},a)})})]})]})}},8797:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/model_cards",function(){return n(7431)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},7239:function(e){e.exports={description:"model_cards_description__3OL_g",larger:"model_cards_larger__R2cNM",model:"model_cards_model__JYTdb"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=8797)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/model_cards-eb373565bd815f35.js b/_next/static/chunks/pages/model_cards-eb373565bd815f35.js
new file mode 100644
index 00000000..11dad99c
--- /dev/null
+++ b/_next/static/chunks/pages/model_cards-eb373565bd815f35.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[467],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return M},y:function(){return B}});var s=n(9008),r=n.n(s),t=n(2717),l=n.n(t),i=n(1943),c=n.n(i),o=n(7839),d=n.n(o),h=n(1664),_=n.n(h),u=n(2777),m=n(2262),g=n(748),p=n(5959),v=n(3553),x=n(7247),f=n(7294),j=n(4776),b=n.n(j),N=n(9417),y=n(7814),k=n(5893),w=function(e){(0,p.Z)(s,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,x.Z)(s);if(a){var r=(0,x.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,v.Z)(this,e)});function s(e){var a;return(0,u.Z)(this,s),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(s,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(y.G,{className:b().bar,icon:N.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),s}(f.Component),B="GEM";function M(e){var a=e.children,n=e.home,s=e.nlAugmenter,t=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(r(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:B}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(l().background," ").concat(s&&c().background),children:[(0,k.jsx)("header",{className:l().header,children:(0,k.jsx)(w,{})}),(0,k.jsxs)("div",{className:"".concat(l().container," ").concat(t&&l().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:l().push})]}),(0,k.jsxs)("footer",{className:l().footer+" "+d().eggshell,children:[!n&&(0,k.jsx)("span",{className:l().backToHome,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},7431:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return u},default:function(){return m}});var s=n(6057),r=n(1664),t=n.n(r),l=n(9008),i=n.n(l),c=n(7839),o=n.n(c),d=n(7239),h=n.n(d),_=n(5893),u=!0;function m(e){var a=e.allData;return(0,_.jsxs)(s.Z,{children:[(0,_.jsx)(i(),{children:(0,_.jsx)("title",{children:"GEM Model Cards"})}),(0,_.jsxs)("article",{children:[(0,_.jsx)("span",{className:o().headingXl,children:"GEM Model Cards"}),(0,_.jsxs)("p",{className:h().description,children:["The list below links to the work-in-progress data cards for models submitted to GEM. As part of our submission process, we ask participants a series of questions about their models. The current version of our model cards lists the provided answers verbatim. The submission form can be found ",(0,_.jsx)("a",{href:"https://forms.gle/pds6cbBf2Gf2VGMv7",target:"_blank",children:"here"}),". The template used to produce the statements and can be found here: [",(0,_.jsx)(t(),{legacyBehavior:!0,href:"/model_card_template.md",children:(0,_.jsx)("a",{download:!0,target:"_blank",children:"download template"})}),"]."]}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("ul",{className:o().list,children:a.map(function(e){var a=e.id,n=e.title,s=e.type,r=e.background;return(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)(t(),{legacyBehavior:!0,href:"/model_cards/".concat(a),children:(0,_.jsx)("a",{className:h().larger,children:n})}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:s}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:h().model,children:r})]},a)})})]})]})}},8797:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/model_cards",function(){return n(7431)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},7239:function(e){e.exports={description:"model_cards_description__3OL_g",larger:"model_cards_larger__R2cNM",model:"model_cards_model__JYTdb"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=8797)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/model_cards/[id]-9ac5e4b15e7a7f67.js b/_next/static/chunks/pages/model_cards/[id]-9ac5e4b15e7a7f67.js
new file mode 100644
index 00000000..2d279f23
--- /dev/null
+++ b/_next/static/chunks/pages/model_cards/[id]-9ac5e4b15e7a7f67.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[550],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return w}});var r=n(9008),t=n.n(r),i=n(2717),s=n.n(i),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),N=n(7814),k=n(5893),B=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(N.G,{className:b().bar,icon:y.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),w="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(t(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:w}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(s().background," ").concat(r&&c().background),children:[(0,k.jsx)("header",{className:s().header,children:(0,k.jsx)(B,{})}),(0,k.jsxs)("div",{className:"".concat(s().container," ").concat(i&&s().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:s().push})]}),(0,k.jsxs)("footer",{className:s().footer+" "+_().eggshell,children:[!n&&(0,k.jsx)("span",{className:s().backToHome,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},8157:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),i=n.n(t),s=n(7839),l=n.n(s),c=n(5893),o=!0;function _(e){var a=e.taskData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(i(),{children:(0,c.jsxs)("title",{children:["GEM ",a.title]})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:a.title}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("span",{className:l().lightText,children:a.type}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},482:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/model_cards/[id]",function(){return n(8157)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=482)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/model_cards/[id]-9cbdb0ece408e680.js b/_next/static/chunks/pages/model_cards/[id]-9cbdb0ece408e680.js
deleted file mode 100644
index c804518b..00000000
--- a/_next/static/chunks/pages/model_cards/[id]-9cbdb0ece408e680.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[550],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return F},y:function(){return C}});var t=a(9008),r=a.n(t),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),d=a.n(h),u=a(2777),m=a(2262),g=a(748),x=a(5959),v=a(3553),p=a(7247),f=a(7294),j=a(4776),b=a.n(j),N=a(9417),k=a(7814),y=a(5893),w=function(e){(0,x.Z)(t,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,p.Z)(t);if(n){var r=(0,p.Z)(this).constructor;e=Reflect.construct(a,arguments,r)}else e=a.apply(this,arguments);return(0,v.Z)(this,e)});function t(e){var n;return(0,u.Z)(this,t),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(t,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(d(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),t}(f.Component),C="GEM";function F(e){var n=e.children,a=e.home,t=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(t&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},8157:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var t=a(6057),r=a(9008),s=a.n(r),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.taskData;return(0,c.jsxs)(t.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsxs)("title",{children:["GEM ",n.title]})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:n.title}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("span",{className:l().lightText,children:n.type}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},482:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/model_cards/[id]",function(){return a(8157)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=482)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/nl_augmenter-908a5b0d2875bb36.js b/_next/static/chunks/pages/nl_augmenter-908a5b0d2875bb36.js
deleted file mode 100644
index 7551cc02..00000000
--- a/_next/static/chunks/pages/nl_augmenter-908a5b0d2875bb36.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[987],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return F},y:function(){return C}});var t=a(9008),r=a.n(t),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),u=a(1664),h=a.n(u),d=a(2777),m=a(2262),g=a(748),v=a(5959),x=a(3553),f=a(7247),p=a(7294),j=a(4776),b=a.n(j),N=a(9417),k=a(7814),y=a(5893),w=function(e){(0,v.Z)(t,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,f.Z)(t);if(n){var r=(0,f.Z)(this).constructor;e=Reflect.construct(a,arguments,r)}else e=a.apply(this,arguments);return(0,x.Z)(this,e)});function t(e){var n;return(0,d.Z)(this,t),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(t,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(h(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),t}(p.Component),C="GEM";function F(e){var n=e.children,a=e.home,t=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(t&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},6092:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return u},default:function(){return h}});var t=a(9008),r=a.n(t),s=a(7839),i=a.n(s),l=a(1943),c=a.n(l),o=a(6057),_=a(5893),u=!0;function h(e){var n=e.nlAugmenterData;return(0,_.jsxs)(o.Z,{nlAugmenter:!0,children:[(0,_.jsx)(r(),{children:(0,_.jsx)("title",{children:"NL-Augmenter"})}),(0,_.jsxs)("article",{children:[(0,_.jsx)("span",{className:"".concat(i().headingXl," ").concat(c().heading),children:"NL-Augmenter \uD83E\uDD8E → \uD83D\uDC0D"}),(0,_.jsx)("span",{className:i().smallSpace}),(0,_.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},7481:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/nl_augmenter",function(){return a(6092)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=7481)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/nl_augmenter-dabf1f7163a4c1fd.js b/_next/static/chunks/pages/nl_augmenter-dabf1f7163a4c1fd.js
new file mode 100644
index 00000000..7de33ae0
--- /dev/null
+++ b/_next/static/chunks/pages/nl_augmenter-dabf1f7163a4c1fd.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[987],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return w}});var r=n(9008),t=n.n(r),i=n(2717),s=n.n(i),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),u=n.n(h),d=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),f=n(7247),p=n(7294),j=n(4776),b=n.n(j),y=n(9417),N=n(7814),k=n(5893),B=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,f.Z)(r);if(a){var t=(0,f.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,d.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(N.G,{className:b().bar,icon:y.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(p.Component),w="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(t(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:w}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(s().background," ").concat(r&&c().background),children:[(0,k.jsx)("header",{className:s().header,children:(0,k.jsx)(B,{})}),(0,k.jsxs)("div",{className:"".concat(s().container," ").concat(i&&s().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:s().push})]}),(0,k.jsxs)("footer",{className:s().footer+" "+_().eggshell,children:[!n&&(0,k.jsx)("span",{className:s().backToHome,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},6092:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return h},default:function(){return u}});var r=n(9008),t=n.n(r),i=n(7839),s=n.n(i),l=n(1943),c=n.n(l),o=n(6057),_=n(5893),h=!0;function u(e){var a=e.nlAugmenterData;return(0,_.jsxs)(o.Z,{nlAugmenter:!0,children:[(0,_.jsx)(t(),{children:(0,_.jsx)("title",{children:"NL-Augmenter"})}),(0,_.jsxs)("article",{children:[(0,_.jsx)("span",{className:"".concat(s().headingXl," ").concat(c().heading),children:"NL-Augmenter \uD83E\uDD8E → \uD83D\uDC0D"}),(0,_.jsx)("span",{className:s().smallSpace}),(0,_.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},7481:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/nl_augmenter",function(){return n(6092)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=7481)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/papers-21975dff751cae66.js b/_next/static/chunks/pages/papers-21975dff751cae66.js
deleted file mode 100644
index 0b5776a1..00000000
--- a/_next/static/chunks/pages/papers-21975dff751cae66.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[372],{6057:function(e,a,s){"use strict";s.d(a,{Z:function(){return E},y:function(){return M}});var r=s(9008),n=s.n(r),i=s(2717),t=s.n(i),l=s(1943),c=s.n(l),o=s(7839),h=s.n(o),d=s(1664),u=s.n(d),m=s(2777),_=s(2262),p=s(748),g=s(5959),x=s(3553),v=s(7247),j=s(7294),f=s(4776),b=s.n(f),N=s(9417),k=s(7814),y=s(5893),w=function(e){(0,g.Z)(r,e);var a,s=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,s=(0,v.Z)(r);if(a){var n=(0,v.Z)(this).constructor;e=Reflect.construct(s,arguments,n)}else e=s.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,m.Z)(this,r),(a=s.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,p.Z)(a)),a.state={active:!1},a}return(0,_.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:h().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(u(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(u(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(j.Component),M="GEM";function E(e){var a=e.children,s=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(n(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:M}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(t().background," ").concat(r&&c().background),children:[(0,y.jsx)("header",{className:t().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(t().container," ").concat(i&&t().wideContainer),children:[(0,y.jsx)("main",{children:a}),(0,y.jsx)("div",{className:t().push})]}),(0,y.jsxs)("footer",{className:t().footer+" "+h().eggshell,children:[!s&&(0,y.jsx)("span",{className:t().backToHome,children:(0,y.jsx)(u(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:h().accentUnderline,children:"google group"})," for support."]})]})]})]})}},5014:function(e,a,s){"use strict";s.r(a),s.d(a,{default:function(){return _}});var r=s(7191),n=s(6057),i=s(9008),t=s.n(i),l=s(1664),c=s.n(l),o=s(7839),h=s.n(o),d=s(4737),u=s.n(d),m=s(5893);function _(e){return(0,r.Z)(e),(0,m.jsxs)(n.Z,{children:[(0,m.jsx)(t(),{children:(0,m.jsx)("title",{children:"GEM \uD83D\uDC8E Papers"})}),(0,m.jsxs)("article",{children:[(0,m.jsx)("span",{className:h().headingXl,children:"Our publications."}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("div",{children:(0,m.jsx)("p",{children:"We are regularly publishing papers on aspects of GEM that describe findings or resources we find worthwhile to share. Please have a look below:"})}),(0,m.jsx)("hr",{}),(0,m.jsxs)("div",{className:u().resources,children:[(0,m.jsxs)("div",{children:[(0,m.jsx)(c(),{href:"https://aclanthology.org/2021.gem-1.10/",children:(0,m.jsx)("a",{className:u().resourceName,children:"GEMv1 Overview"})}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"GEM Workshop 2021"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:"This is our first overview paper, introducing GEM and the initial set of 13 tasks and associated baselines."}),(0,m.jsxs)("div",{className:u().authors,children:[" Authors: All GEMv1 participants (see ",(0,m.jsx)(c(),{href:"team/2021",children:"team list"}),")"]}),(0,m.jsxs)("div",{children:[(0,m.jsx)(c(),{href:"https://arxiv.org/abs/2206.11249",children:(0,m.jsx)("a",{className:u().resourceName,children:"GEMv2 Overview"})}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"ArXiv"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:"This is our second overview paper, expanding GEM to 40 tasks and 51 languages, introducing the automatic evaluation on the HuggingFace Hub."}),(0,m.jsxs)("div",{className:u().authors,children:[" Authors: All GEMv2 participants (see ",(0,m.jsx)(c(),{href:"team",children:"team list"}),")"]}),(0,m.jsxs)("div",{children:[(0,m.jsx)(c(),{href:"https://arxiv.org/abs/2202.06935",children:(0,m.jsx)("a",{className:u().resourceNameSmaller,children:"Repairing the Cracked Foundation: A Survey of Obstacles in Evaluation Practices for Generated Text"})}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"ArXiv"})]}),(0,m.jsxs)("div",{className:u().resourceDetail,children:["In this survey paper, we discuss many of the principles underlying GEM and propose a set of best practices to follow for model evaluation. See also the ",(0,m.jsx)(c(),{href:"https://ml-eval.github.io/assets/pdf/better_eval_in_NLG.pdf",children:"shortened version"})," presented at the MLEval workshop at ICLR 2022."]}),(0,m.jsx)("div",{className:u().authors,children:" Authors: Sebastian Gehrmann, Elizabeth Clark, Thibault Sellam"}),(0,m.jsxs)("div",{children:[(0,m.jsx)("a",{href:"https://aclanthology.org/2021.gem-1.11/",target:"_blank",className:u().resourceName,children:"Data Cards"}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"GEM Workshop 2021"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:'In "Reusable Templates and Guides For Documenting Datasets and Models for Natural Language Processing and Generation: A Case Study of the HuggingFace and GEM Data and Model Cards", we describe the approach for data documentation in GEMv1 and the similar approach used by HuggingFace datasets.'}),(0,m.jsx)("div",{className:u().authors,children:"Authors: Angelina McMillan-Major, Salomey Osei, Juan Diego Rodriguez, Pawan Sasanka Ammanamanchi, Sebastian Gehrmann, Yacine Jernite"}),(0,m.jsxs)("div",{children:[(0,m.jsx)("a",{href:"https://openreview.net/forum?id=CSi1eu_2q96",target:"_blank",className:u().resourceName,children:"Evaluation Suites"}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"NeurIPS 2021"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:'In the paper "Automatic Construction of Evaluation Suites for Natural Language Generation Datasets", we discuss how to build data collections that test robustness of models and show that they are much more expressive than typical test splits.'}),(0,m.jsx)("div",{className:u().authors,children:"Authors: Simon Mille, Kaustubh Dhole, Saad Mahamood, Laura Perez-Beltrachini, Varun Gangal, Mihir Kale, Emiel van Miltenburg, Sebastian Gehrmann"}),(0,m.jsxs)("div",{children:[(0,m.jsx)(c(),{href:"https://arxiv.org/abs/2112.02721",children:(0,m.jsx)("a",{className:u().resourceName,children:"NL-Augmenter \uD83E\uDD8E → \uD83D\uDC0D"})}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"GEM Workshop 2021"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:"This was a collaborative & participatory workshop collecting >117 different ways to transform text and >23 ways to filter out subpopulations of datasets."}),(0,m.jsxs)("div",{className:u().authors,children:[" Participants and Authors: Listed in paper (see ",(0,m.jsx)(c(),{href:"https://arxiv.org/abs/2112.02721",children:"team list"}),")"]}),(0,m.jsx)("div",{className:u().authors,children:" Steering Commitee: Kaustubh Dhole, Varun Gangal, Sebastian Gehrmann, Aadesh Gupta, Zhenhao Li, Saad Mahmood, Simon Mille, Jascha SohlDickstein, Ashish Srivastava, Samson Tan, Tongshuang Wu and Abinaya Mahendiran "})]})]})]})}},8798:function(e,a,s){(window.__NEXT_P=window.__NEXT_P||[]).push(["/papers",function(){return s(5014)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},4737:function(e){e.exports={resources:"papers_resources__cvuq3",resourceName:"papers_resourceName__ncTs0",resourceNameSmaller:"papers_resourceNameSmaller___Q6IP",resourceDetail:"papers_resourceDetail__sGk32",venue:"papers_venue__GmHCn",authors:"papers_authors__LY55g"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}},7191:function(e,a,s){"use strict";function r(e){if(null==e)throw TypeError("Cannot destructure undefined")}s.d(a,{Z:function(){return r}})}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=8798)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/papers-90207f0fdfe1fa9c.js b/_next/static/chunks/pages/papers-90207f0fdfe1fa9c.js
new file mode 100644
index 00000000..0b030c24
--- /dev/null
+++ b/_next/static/chunks/pages/papers-90207f0fdfe1fa9c.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[372],{6057:function(e,a,s){"use strict";s.d(a,{Z:function(){return E},y:function(){return M}});var r=s(9008),n=s.n(r),i=s(2717),t=s.n(i),l=s(1943),c=s.n(l),o=s(7839),h=s.n(o),d=s(1664),u=s.n(d),m=s(2777),_=s(2262),g=s(748),p=s(5959),v=s(3553),x=s(7247),j=s(7294),f=s(4776),b=s.n(f),N=s(9417),y=s(7814),k=s(5893),w=function(e){(0,p.Z)(r,e);var a,s=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,s=(0,x.Z)(r);if(a){var n=(0,x.Z)(this).constructor;e=Reflect.construct(s,arguments,n)}else e=s.apply(this,arguments);return(0,v.Z)(this,e)});function r(e){var a;return(0,m.Z)(this,r),(a=s.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,_.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:h().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(y.G,{className:b().bar,icon:N.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(j.Component),M="GEM";function E(e){var a=e.children,s=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(n(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:M}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(t().background," ").concat(r&&c().background),children:[(0,k.jsx)("header",{className:t().header,children:(0,k.jsx)(w,{})}),(0,k.jsxs)("div",{className:"".concat(t().container," ").concat(i&&t().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:t().push})]}),(0,k.jsxs)("footer",{className:t().footer+" "+h().eggshell,children:[!s&&(0,k.jsx)("span",{className:t().backToHome,children:(0,k.jsx)(u(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:h().accentUnderline,children:"google group"})," for support."]})]})]})]})}},5014:function(e,a,s){"use strict";s.r(a),s.d(a,{default:function(){return _}});var r=s(7191),n=s(6057),i=s(9008),t=s.n(i),l=s(1664),c=s.n(l),o=s(7839),h=s.n(o),d=s(4737),u=s.n(d),m=s(5893);function _(e){return(0,r.Z)(e),(0,m.jsxs)(n.Z,{children:[(0,m.jsx)(t(),{children:(0,m.jsx)("title",{children:"GEM \uD83D\uDC8E Papers"})}),(0,m.jsxs)("article",{children:[(0,m.jsx)("span",{className:h().headingXl,children:"Our publications."}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("div",{children:(0,m.jsx)("p",{children:"We are regularly publishing papers on aspects of GEM that describe findings or resources we find worthwhile to share. Please have a look below:"})}),(0,m.jsx)("hr",{}),(0,m.jsxs)("div",{className:u().resources,children:[(0,m.jsxs)("div",{children:[(0,m.jsx)(c(),{legacyBehavior:!0,href:"https://aclanthology.org/2021.gem-1.10/",children:(0,m.jsx)("a",{className:u().resourceName,children:"GEMv1 Overview"})}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"GEM Workshop 2021"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:"This is our first overview paper, introducing GEM and the initial set of 13 tasks and associated baselines."}),(0,m.jsxs)("div",{className:u().authors,children:[" Authors: All GEMv1 participants (see ",(0,m.jsx)(c(),{legacyBehavior:!0,href:"team/2021",children:"team list"}),")"]}),(0,m.jsxs)("div",{children:[(0,m.jsx)(c(),{legacyBehavior:!0,href:"https://arxiv.org/abs/2206.11249",children:(0,m.jsx)("a",{className:u().resourceName,children:"GEMv2 Overview"})}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"ArXiv"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:"This is our second overview paper, expanding GEM to 40 tasks and 51 languages, introducing the automatic evaluation on the HuggingFace Hub."}),(0,m.jsxs)("div",{className:u().authors,children:[" Authors: All GEMv2 participants (see ",(0,m.jsx)(c(),{legacyBehavior:!0,href:"team",children:"team list"}),")"]}),(0,m.jsxs)("div",{children:[(0,m.jsx)(c(),{legacyBehavior:!0,href:"https://arxiv.org/abs/2202.06935",children:(0,m.jsx)("a",{className:u().resourceNameSmaller,children:"Repairing the Cracked Foundation: A Survey of Obstacles in Evaluation Practices for Generated Text"})}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"ArXiv"})]}),(0,m.jsxs)("div",{className:u().resourceDetail,children:["In this survey paper, we discuss many of the principles underlying GEM and propose a set of best practices to follow for model evaluation. See also the ",(0,m.jsx)(c(),{legacyBehavior:!0,href:"https://ml-eval.github.io/assets/pdf/better_eval_in_NLG.pdf",children:"shortened version"})," presented at the MLEval workshop at ICLR 2022."]}),(0,m.jsx)("div",{className:u().authors,children:" Authors: Sebastian Gehrmann, Elizabeth Clark, Thibault Sellam"}),(0,m.jsxs)("div",{children:[(0,m.jsx)("a",{href:"https://aclanthology.org/2021.gem-1.11/",target:"_blank",className:u().resourceName,children:"Data Cards"}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"GEM Workshop 2021"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:'In "Reusable Templates and Guides For Documenting Datasets and Models for Natural Language Processing and Generation: A Case Study of the HuggingFace and GEM Data and Model Cards", we describe the approach for data documentation in GEMv1 and the similar approach used by HuggingFace datasets.'}),(0,m.jsx)("div",{className:u().authors,children:"Authors: Angelina McMillan-Major, Salomey Osei, Juan Diego Rodriguez, Pawan Sasanka Ammanamanchi, Sebastian Gehrmann, Yacine Jernite"}),(0,m.jsxs)("div",{children:[(0,m.jsx)("a",{href:"https://openreview.net/forum?id=CSi1eu_2q96",target:"_blank",className:u().resourceName,children:"Evaluation Suites"}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"NeurIPS 2021"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:'In the paper "Automatic Construction of Evaluation Suites for Natural Language Generation Datasets", we discuss how to build data collections that test robustness of models and show that they are much more expressive than typical test splits.'}),(0,m.jsx)("div",{className:u().authors,children:"Authors: Simon Mille, Kaustubh Dhole, Saad Mahamood, Laura Perez-Beltrachini, Varun Gangal, Mihir Kale, Emiel van Miltenburg, Sebastian Gehrmann"}),(0,m.jsxs)("div",{children:[(0,m.jsx)(c(),{legacyBehavior:!0,href:"https://arxiv.org/abs/2112.02721",children:(0,m.jsx)("a",{className:u().resourceName,children:"NL-Augmenter \uD83E\uDD8E → \uD83D\uDC0D"})}),(0,m.jsx)("span",{className:h().smallSpace}),(0,m.jsx)("small",{className:h().lightText,children:"GEM Workshop 2021"})]}),(0,m.jsx)("div",{className:u().resourceDetail,children:"This was a collaborative & participatory workshop collecting >117 different ways to transform text and >23 ways to filter out subpopulations of datasets."}),(0,m.jsxs)("div",{className:u().authors,children:[" Participants and Authors: Listed in paper (see ",(0,m.jsx)(c(),{legacyBehavior:!0,href:"https://arxiv.org/abs/2112.02721",children:"team list"}),")"]}),(0,m.jsx)("div",{className:u().authors,children:" Steering Commitee: Kaustubh Dhole, Varun Gangal, Sebastian Gehrmann, Aadesh Gupta, Zhenhao Li, Saad Mahmood, Simon Mille, Jascha SohlDickstein, Ashish Srivastava, Samson Tan, Tongshuang Wu and Abinaya Mahendiran "})]})]})]})}},8798:function(e,a,s){(window.__NEXT_P=window.__NEXT_P||[]).push(["/papers",function(){return s(5014)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},4737:function(e){e.exports={resources:"papers_resources__cvuq3",resourceName:"papers_resourceName__ncTs0",resourceNameSmaller:"papers_resourceNameSmaller___Q6IP",resourceDetail:"papers_resourceDetail__sGk32",venue:"papers_venue__GmHCn",authors:"papers_authors__LY55g"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}},7191:function(e,a,s){"use strict";function r(e){if(null==e)throw TypeError("Cannot destructure undefined")}s.d(a,{Z:function(){return r}})}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=8798)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/resources-0cab39da6b7e3a17.js b/_next/static/chunks/pages/resources-0cab39da6b7e3a17.js
new file mode 100644
index 00000000..dd765191
--- /dev/null
+++ b/_next/static/chunks/pages/resources-0cab39da6b7e3a17.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[584],{6057:function(e,a,r){"use strict";r.d(a,{Z:function(){return M},y:function(){return E}});var s=r(9008),n=r.n(s),t=r(2717),i=r.n(t),c=r(1943),l=r.n(c),o=r(7839),d=r.n(o),u=r(1664),h=r.n(u),_=r(2777),m=r(2262),g=r(748),v=r(5959),f=r(3553),p=r(7247),x=r(7294),j=r(4776),b=r.n(j),N=r(9417),y=r(7814),k=r(5893),w=function(e){(0,v.Z)(s,e);var a,r=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,r=(0,p.Z)(s);if(a){var n=(0,p.Z)(this).constructor;e=Reflect.construct(r,arguments,n)}else e=r.apply(this,arguments);return(0,f.Z)(this,e)});function s(e){var a;return(0,_.Z)(this,s),(a=r.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(s,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(y.G,{className:b().bar,icon:N.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),s}(x.Component),E="GEM";function M(e){var a=e.children,r=e.home,s=e.nlAugmenter,t=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(n(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:E}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(i().background," ").concat(s&&l().background),children:[(0,k.jsx)("header",{className:i().header,children:(0,k.jsx)(w,{})}),(0,k.jsxs)("div",{className:"".concat(i().container," ").concat(t&&i().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:i().push})]}),(0,k.jsxs)("footer",{className:i().footer+" "+d().eggshell,children:[!r&&(0,k.jsx)("span",{className:i().backToHome,children:(0,k.jsx)(h(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},7702:function(e,a,r){"use strict";r.r(a),r.d(a,{default:function(){return m}});var s=r(7191),n=r(6057),t=r(9008),i=r.n(t),c=r(1664),l=r.n(c),o=r(7839),d=r.n(o),u=r(7451),h=r.n(u),_=r(5893);function m(e){return(0,s.Z)(e),(0,_.jsxs)(n.Z,{children:[(0,_.jsx)(i(),{children:(0,_.jsx)("title",{children:"GEM \uD83D\uDC8E Resources"})}),(0,_.jsxs)("article",{children:[(0,_.jsx)("span",{className:d().headingXl,children:"Using our resources."}),(0,_.jsx)("span",{className:d().smallSpace}),(0,_.jsx)("div",{children:(0,_.jsx)("p",{children:"As part of GEM, we are continuously producing resources for the research community. This page provides download links and brief explanations of each."})}),(0,_.jsx)("hr",{}),(0,_.jsxs)("div",{className:h().resources,children:[(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)(l(),{href:"https://storage.googleapis.com/gem-benchmark/scores_and_outputs.zip",children:"Outputs and Scores"})}),(0,_.jsx)("div",{className:h().resourceDetail,children:"Our growing collection of millions of outputs and automatic scores for 20+ models across all GEM tasks. This resource is to be used for work on model evaluation, to characterize model shortcomings, and to provide baseline outputs for model comparison."}),(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)("a",{href:"https://huggingface.co/datasets/gem",target:"_blank",children:"HuggingFace Loader"})}),(0,_.jsx)("div",{className:h().resourceDetail,children:"All our datasets can be loaded via this data loader implemented in HuggingFace datasets."}),(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)("a",{href:"https://www.tensorflow.org/datasets/catalog/gem",target:"_blank",children:"TFDS Loader"})}),(0,_.jsx)("div",{className:h().resourceDetail,children:"All our datasets can be loaded via this data loader implemented in TFDS."}),(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)("a",{href:"https://github.com/GEM-benchmark/GEM-metrics",target:"_blank",children:"Metrics Repository"})}),(0,_.jsx)("div",{className:h().resourceDetail,children:"Our package for model evaluation. If you want to compute our full suite of metrics with additional convenience functions like caching and parallelism, simply add your dataset to it and follow the instructions in the README."}),(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)("a",{href:"https://github.com/GEM-benchmark/NL-Augmenter",target:"_blank",children:"NL-Augmenter"})}),(0,_.jsxs)("div",{className:h().resourceDetail,children:["If you want to run robustness tests on your model and data, NL-Augmenter can help! More information can be found on ",(0,_.jsx)(l(),{href:"nl-augmenter",children:"the dedicated site"}),"."]})]})]})]})}},1903:function(e,a,r){(window.__NEXT_P=window.__NEXT_P||[]).push(["/resources",function(){return r(7702)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7451:function(e){e.exports={resources:"resources_resources__7Vbk5",resourceName:"resources_resourceName__rTdCM",resourceDetail:"resources_resourceDetail__hI_Px"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}},7191:function(e,a,r){"use strict";function s(e){if(null==e)throw TypeError("Cannot destructure undefined")}r.d(a,{Z:function(){return s}})}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=1903)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/resources-a2ebdb8ec0162ade.js b/_next/static/chunks/pages/resources-a2ebdb8ec0162ade.js
deleted file mode 100644
index fe6f61e8..00000000
--- a/_next/static/chunks/pages/resources-a2ebdb8ec0162ade.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[584],{6057:function(e,a,s){"use strict";s.d(a,{Z:function(){return M},y:function(){return E}});var n=s(9008),r=s.n(n),t=s(2717),i=s.n(t),c=s(1943),l=s.n(c),o=s(7839),d=s.n(o),u=s(1664),h=s.n(u),_=s(2777),m=s(2262),g=s(748),f=s(5959),p=s(3553),x=s(7247),v=s(7294),j=s(4776),b=s.n(j),N=s(9417),k=s(7814),y=s(5893),w=function(e){(0,f.Z)(n,e);var a,s=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,s=(0,x.Z)(n);if(a){var r=(0,x.Z)(this).constructor;e=Reflect.construct(s,arguments,r)}else e=s.apply(this,arguments);return(0,p.Z)(this,e)});function n(e){var a;return(0,_.Z)(this,n),(a=s.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(n,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(h(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),n}(v.Component),E="GEM";function M(e){var a=e.children,s=e.home,n=e.nlAugmenter,t=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:E}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(n&&l().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(t&&i().wideContainer),children:[(0,y.jsx)("main",{children:a}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+d().eggshell,children:[!s&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},7702:function(e,a,s){"use strict";s.r(a),s.d(a,{default:function(){return m}});var n=s(7191),r=s(6057),t=s(9008),i=s.n(t),c=s(1664),l=s.n(c),o=s(7839),d=s.n(o),u=s(7451),h=s.n(u),_=s(5893);function m(e){return(0,n.Z)(e),(0,_.jsxs)(r.Z,{children:[(0,_.jsx)(i(),{children:(0,_.jsx)("title",{children:"GEM \uD83D\uDC8E Resources"})}),(0,_.jsxs)("article",{children:[(0,_.jsx)("span",{className:d().headingXl,children:"Using our resources."}),(0,_.jsx)("span",{className:d().smallSpace}),(0,_.jsx)("div",{children:(0,_.jsx)("p",{children:"As part of GEM, we are continuously producing resources for the research community. This page provides download links and brief explanations of each."})}),(0,_.jsx)("hr",{}),(0,_.jsxs)("div",{className:h().resources,children:[(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)(l(),{href:"https://storage.googleapis.com/gem-benchmark/scores_and_outputs.zip",children:"Outputs and Scores"})}),(0,_.jsx)("div",{className:h().resourceDetail,children:"Our growing collection of millions of outputs and automatic scores for 20+ models across all GEM tasks. This resource is to be used for work on model evaluation, to characterize model shortcomings, and to provide baseline outputs for model comparison."}),(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)("a",{href:"https://huggingface.co/datasets/gem",target:"_blank",children:"HuggingFace Loader"})}),(0,_.jsx)("div",{className:h().resourceDetail,children:"All our datasets can be loaded via this data loader implemented in HuggingFace datasets."}),(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)("a",{href:"https://www.tensorflow.org/datasets/catalog/gem",target:"_blank",children:"TFDS Loader"})}),(0,_.jsx)("div",{className:h().resourceDetail,children:"All our datasets can be loaded via this data loader implemented in TFDS."}),(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)("a",{href:"https://github.com/GEM-benchmark/GEM-metrics",target:"_blank",children:"Metrics Repository"})}),(0,_.jsx)("div",{className:h().resourceDetail,children:"Our package for model evaluation. If you want to compute our full suite of metrics with additional convenience functions like caching and parallelism, simply add your dataset to it and follow the instructions in the README."}),(0,_.jsx)("div",{className:h().resourceName,children:(0,_.jsx)("a",{href:"https://github.com/GEM-benchmark/NL-Augmenter",target:"_blank",children:"NL-Augmenter"})}),(0,_.jsxs)("div",{className:h().resourceDetail,children:["If you want to run robustness tests on your model and data, NL-Augmenter can help! More information can be found on ",(0,_.jsx)(l(),{href:"nl-augmenter",children:"the dedicated site"}),"."]})]})]})]})}},1903:function(e,a,s){(window.__NEXT_P=window.__NEXT_P||[]).push(["/resources",function(){return s(7702)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7451:function(e){e.exports={resources:"resources_resources__7Vbk5",resourceName:"resources_resourceName__rTdCM",resourceDetail:"resources_resourceDetail__hI_Px"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}},7191:function(e,a,s){"use strict";function n(e){if(null==e)throw TypeError("Cannot destructure undefined")}s.d(a,{Z:function(){return n}})}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=1903)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/results-2f15550ebb6e9ca7.js b/_next/static/chunks/pages/results-2f15550ebb6e9ca7.js
new file mode 100644
index 00000000..0bd795cf
--- /dev/null
+++ b/_next/static/chunks/pages/results-2f15550ebb6e9ca7.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[255],{6057:function(e,t,n){"use strict";n.d(t,{Z:function(){return C},y:function(){return B}});var r=n(9008),s=n.n(r),a=n(2717),i=n.n(a),o=n(1943),c=n.n(o),l=n(7839),u=n.n(l),h=n(1664),f=n.n(h),d=n(2777),m=n(2262),_=n(748),v=n(5959),p=n(3553),g=n(7247),x=n(7294),y=n(4776),b=n.n(y),j=n(9417),N=n(7814),Z=n(5893),k=function(e){(0,v.Z)(r,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,g.Z)(r);if(t){var s=(0,g.Z)(this).constructor;e=Reflect.construct(n,arguments,s)}else e=n.apply(this,arguments);return(0,p.Z)(this,e)});function r(e){var t;return(0,d.Z)(this,r),(t=n.call(this,e)).handleMobileClick=t.handleMobileClick.bind((0,_.Z)(t)),t.state={active:!1},t}return(0,m.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,Z.jsxs)("div",{className:b().navwrapper,children:[(0,Z.jsx)("div",{className:b().gradbar}),(0,Z.jsxs)("nav",{className:b().navbar,children:[(0,Z.jsx)("span",{className:u().headingLg+" "+b().navbarlogo,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/",children:(0,Z.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,Z.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,Z.jsx)(N.G,{className:b().bar,icon:j.xiG})}),(0,Z.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,Z.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/resources/",children:(0,Z.jsx)("a",{children:"Resources"})})}),(0,Z.jsx)("li",{className:b().navitem,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/data_cards/",children:(0,Z.jsx)("a",{children:"Data Cards"})})}),(0,Z.jsx)("li",{className:b().navitem,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/model_cards",children:(0,Z.jsx)("a",{children:"Model Cards"})})}),(0,Z.jsx)("li",{className:b().navitem,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/tutorials",children:(0,Z.jsx)("a",{children:"tutorials"})})}),(0,Z.jsx)("li",{className:b().navitem,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/results/",children:(0,Z.jsx)("a",{children:"Results"})})}),(0,Z.jsx)("li",{className:b().navitem,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/papers/",children:(0,Z.jsx)("a",{children:"Papers"})})}),(0,Z.jsx)("li",{className:b().navitem,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/workshop",children:(0,Z.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(x.Component),B="GEM";function C(e){var t=e.children,n=e.home,r=e.nlAugmenter,a=e.wideContainer;return(0,Z.jsxs)(Z.Fragment,{children:[(0,Z.jsxs)(s(),{children:[(0,Z.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,Z.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,Z.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,Z.jsx)("meta",{name:"og:title",content:B}),(0,Z.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,Z.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,Z.jsx)("header",{className:i().header,children:(0,Z.jsx)(k,{})}),(0,Z.jsxs)("div",{className:"".concat(i().container," ").concat(a&&i().wideContainer),children:[(0,Z.jsx)("main",{children:t}),(0,Z.jsx)("div",{className:i().push})]}),(0,Z.jsxs)("footer",{className:i().footer+" "+u().eggshell,children:[!n&&(0,Z.jsx)("span",{className:i().backToHome,children:(0,Z.jsx)(f(),{legacyBehavior:!0,href:"/",children:(0,Z.jsx)("a",{children:"← Home"})})}),(0,Z.jsxs)("span",{children:["If you have any questions, please join our ",(0,Z.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:u().accentUnderline,children:"google group"})," for support."]})]})]})]})}},1950:function(e,t,n){"use strict";n.r(t),n.d(t,{__N_SSG:function(){return O},default:function(){return A}});var r=n(2777),s=n(2262),a=n(748),i=n(5959),o=n(3553),c=n(7247),l=n(9499),u=n(6057),h=n(7294),f=n(5631),d=n.n(f),m=n(1736);n(3042);var _=n(5893),v=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{}),e}return(0,s.Z)(u,[{key:"render",value:function(){var e=this,t=this.props.config.measures,n=this.props.config.common_metrics,r=Object.keys(t).sort().map(function(r,s){var a=e.props.cm.getColorForMeasure(r),i=t[r].map(function(e){return(0,_.jsx)(m.ZP,{content:n[e].description,delay:0,children:(0,_.jsx)("div",{className:d().measureBox,style:{borderColor:a},children:n[e].show_as})},e)});return(0,_.jsxs)("div",{style:{display:"flex",flexDirection:"row"},children:[(0,_.jsx)("div",{className:[d().metaBox].join(" "),style:{background:a},children:r}),(0,_.jsx)("div",{children:i})]},r)});return(0,_.jsx)("div",{className:[d().matrix].join(" "),children:r})}}]),u}(h.Component),p=function(){function e(){var t=this;(0,r.Z)(this,e),(0,l.Z)(this,"colors",["#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854","#ffd92f","#e5c494","#b3b3b3"]),(0,l.Z)(this,"colorMap",{}),(0,l.Z)(this,"drawColor",function(e){return t.colors[e%t.colors.length]}),(0,l.Z)(this,"getColorForMeasure",function(e){return e in t.colorMap?t.colorMap[e]:""})}return(0,s.Z)(e,null,[{key:"generateForEvalConfig",value:function(t){var n=new e,r=t.measures;return Object.keys(r).sort().map(function(e,t){var s=n.drawColor(t);n.colorMap[e]=s,r[e].map(function(e){n.colorMap[e]=s})}),n}}]),e}(),g=n(7812),x=n(6835),y=n(7272),b=n(6589),j=n.n(b),N=n(6488);n(5735);var Z=n(6486);function k(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=Array(t);n<t;n++)r[n]=e[n];return r}var B=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{datasetMatrix:[],datasetNames:[],measureNames:[],xScale:null,yScales:[],filters:{}}),(0,l.Z)((0,a.Z)(e),"brushes",h.createRef()),(0,l.Z)((0,a.Z)(e),"lineGen",y.jvg().defined(function(e){return!!e}).x(function(t,n){return e.state.xScale(n)}).y(function(t,n){return u.height-e.state.yScales[n](t)})),(0,l.Z)((0,a.Z)(e),"selectDataset",function(t){e.setState({selectedDatasets:[t]})}),e}return(0,s.Z)(u,[{key:"componentDidMount",value:function(){var e=this,t=this.state,n=function(e,n){var r=(0,x.Z)(e,2),s=r[0],a=(0,x.Z)(r[1],2),i=a[0],o=a[1];return i<=n[t.measureNames.indexOf(s)]&&n[t.measureNames.indexOf(s)]<=o},r=(0,Z.throttle)(function(){var r=Object.entries(t.filters);if(0===r.length)e.props.onFilterChange(null);else{var s=t.datasetNames.filter(function(e,s){return r.every(function(e){return n(e,t.datasetMatrix[s])})});e.props.onFilterChange(s)}},300),s=(0,Z.throttle)(this.forceUpdate,100),a=y.Lae().extent([[-3,0],[3,u.height]]).on("end brush",function(n,a){null===n.selection?delete t.filters[a]:t.filters[a]=n.selection.map(function(e){return u.height-e}).map(t.yScales[t.measureNames.indexOf(a)].invert).reverse(),s.call(e),r.call(e)});y.Ys(this.brushes.current).selectAll(".brush").data(this.state.measureNames).join("g").attr("class","brush").attr("transform",function(t,n){return"translate(".concat(e.state.xScale(n),",0)")}).call(a)}},{key:"render",value:function(){var e=this,t=this.state,n=this.props,r=function(e){return 1>Math.abs(e)?y.WUZ(".3f")(e):y.WUZ(".3s")(e)};return(0,_.jsx)("svg",{height:270,width:this.state.measureNames.length*u.slotWidth+50,children:(0,_.jsxs)("g",{transform:"translate(30,20)",children:[(0,_.jsx)("g",{className:"bg"}),(0,_.jsx)("g",{className:"labels",children:t.measureNames.map(function(n,r){return(0,_.jsx)("text",{transform:"translate(".concat(t.xScale(r),",").concat(u.height+25,")rotate(30)"),className:j().label,style:{fill:e.props.cm.getColorForMeasure(n)},children:e.props.config.common_metrics[n].show_as},n)})}),(0,_.jsx)("g",{className:"yAxes",children:t.measureNames.map(function(n,r){return(0,_.jsx)("line",{className:j().yAxis,x1:t.xScale(r),x2:t.xScale(r),y1:-5,y2:u.height+5,style:{stroke:e.props.cm.getColorForMeasure(n)}},n)})}),(0,_.jsx)("g",{className:"minMaxValues",children:t.yScales.map(function(e,n){return(0,_.jsxs)(h.Fragment,{children:[(0,_.jsx)("text",{className:[j().extremaLabelTop,j().textNon].join(" "),transform:"translate(".concat(t.xScale(n),",-7)"),children:r(e.domain()[1])}),(0,_.jsx)("text",{className:[j().extremaLabelBtm,j().textNon].join(" "),transform:"translate(".concat(t.xScale(n),",").concat(u.height+7+7,")"),children:r(e.domain()[0])})]},t.measureNames[n])})}),(0,_.jsx)("g",{className:"dataLines",children:t.datasetMatrix.map(function(r,s){var a;return(0,_.jsx)(m.ZP,{followCursor:!0,theme:"translucent",content:t.datasetNames[s],plugins:[N.Cv],children:(0,_.jsx)("path",{d:e.lineGen(r),className:(a=Object.entries(t.filters),[j().line,n.highlighted.indexOf(t.datasetNames[s])>-1?j().selected:"",a.length>0?a.every(function(e){var n=(0,x.Z)(e,2),s=n[0],a=(0,x.Z)(n[1],2),i=a[0],o=a[1];return i<=r[t.measureNames.indexOf(s)]&&r[t.measureNames.indexOf(s)]<=o})?j().lineVisible:j().lineInvisible:""].join(" ")),onMouseEnter:function(){return n.onDatasetHover(t.datasetNames[s],!0)},onMouseLeave:function(){return n.onDatasetHover(t.datasetNames[s],!1)}})},t.datasetNames[s])})}),(0,_.jsx)("g",{className:"brushes",ref:this.brushes})]})})}}],[{key:"getDerivedStateFromProps",value:function(e,t){if(t.datasetMatrix.length>0||e.scores.length<1)return{};var n=Object.entries(e.config.measures).sort(),r=[],s={};n.forEach(function(e){var t=(0,x.Z)(e,2),n=(t[0],t[1]);r=[].concat((0,g.Z)(r),(0,g.Z)(n.sort())),n.forEach(function(e){return s[e]=0})});var a=[],i=[];e.scores.forEach(function(e){var t=e.submission_name;Object.entries(e).forEach(function(e){var n=(0,x.Z)(e,2),o=n[0],c=n[1];"submission"!=o&&"string"!=typeof c&&"number"!=typeof c&&(o.endsWith("_test")||o.endsWith("test_turk")||o.endsWith("test_asset"))&&o&&(i.push(t+"."+o),a.push(r.map(function(e){var t,n=e in c?(t=c[e],e.startsWith("rouge")?t.fmeasure:"bertscore"===e?t.f1:"nubia"===e?t.nubia_score:t):null;return n&&(s[e]+=1),n})))})});var o,c=a[0].map(function(){return y.BYU().range([0,u.height])}),l=function(e,t){var n="undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(!n){if(Array.isArray(e)||(n=function(e,t){if(e){if("string"==typeof e)return k(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);if("Object"===n&&e.constructor&&(n=e.constructor.name),"Map"===n||"Set"===n)return Array.from(e);if("Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return k(e,t)}}(e))){n&&(e=n);var r=0,s=function(){};return{s:s,n:function(){return r>=e.length?{done:!0}:{done:!1,value:e[r++]}},e:function(e){throw e},f:s}}throw TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var a,i=!0,o=!1;return{s:function(){n=n.call(e)},n:function(){var e=n.next();return i=e.done,e},e:function(e){o=!0,a=e},f:function(){try{i||null==n.return||n.return()}finally{if(o)throw a}}}}(y.w6H(a[0].length));try{for(l.s();!(o=l.n()).done;)!function(){var e=o.value,t=y.Wem(a.map(function(t){return t[e]}));c[e].domain(t)}()}catch(e){l.e(e)}finally{l.f()}return{yScales:c,xScale:y.BYU().range([0,u.slotWidth]),datasetNames:i,measureNames:r,datasetMatrix:a}}}]),u}(h.PureComponent);(0,l.Z)(B,"defaultProps",{onDatasetHover:function(){},onFilterChange:function(){},onDatasetSelect:function(){},highlighted:[]}),(0,l.Z)(B,"height",150),(0,l.Z)(B,"slotWidth",30);var C=n(1962),F=n.n(C),M=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{datasetHierarchy:[]}),e}return(0,s.Z)(u,[{key:"render",value:function(){var e=this,t=this.props.submissionFilter,n=this.props.highlighted,r=this.state.datasetHierarchy.map(function(r){var s=r.datasets.map(function(r){var s=r.ds,a=r.submissions.map(function(r){return(0,_.jsx)("div",{className:[F().dsBox,n.indexOf("".concat(r,".").concat(s))>-1?F().dsBoxHover:"",t?t.indexOf("".concat(r,".").concat(s))>-1?F().selected:F().nonSelected:""].join(" "),onMouseEnter:function(){return e.props.onHover(["".concat(r,".").concat(s)],!0)},onMouseLeave:function(){return e.props.onHover(["".concat(r,".").concat(s)],!1)},children:r},r)});return(0,_.jsxs)("div",{style:{display:"flex"},children:[(0,_.jsx)("div",{className:F().metaBox,onMouseEnter:function(){return e.props.onHover(r.submissions.map(function(e){return"".concat(e,".").concat(s)}),!0)},onMouseLeave:function(){return e.props.onHover(r.submissions.map(function(e){return"".concat(e,".").concat(s)}),!1)},children:r.ds}),(0,_.jsx)("div",{children:a})]},r.ds)});return(0,_.jsxs)("div",{style:{display:"flex",margin:"2pt 0"},children:[(0,_.jsx)("div",{className:F().metaMetaBox,children:r.task}),(0,_.jsx)("div",{children:s})]},r.task)});return(0,_.jsx)("div",{className:F().matrix,children:r})}}],[{key:"getDerivedStateFromProps",value:function(e,t){return{datasetHierarchy:Object.keys(e.config.challenges).sort().map(function(t){var n=e.config.challenges[t].datasets.map(function(t){var n="".concat(t),r=e.scores.filter(function(e){return n in e}).map(function(e){return e.submission_name});return{ds:n,submissions:r}}).filter(function(e){return e.submissions.length>0}).sort(function(e,t){return y.j2p(e.ds,t.ds)});return{task:t,datasets:n}})}}}]),u}(h.PureComponent);(0,l.Z)(M,"defaultProps",{submissionFilter:null,highlighted:[]});var S=n(6684),R=n.n(S),w=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{measureNames:[],challengeResults:{},challengeNames:[],columnFilter:""}),e}return(0,s.Z)(u,[{key:"componentDidMount",value:function(){}},{key:"render",value:function(){var e=this,t=this.state;return(0,_.jsxs)("table",{className:"table",style:{fontSize:"8pt",borderCollapse:"collapse",cursor:"crosshair",marginBottom:"40px",marginTop:"10px"},children:[(0,_.jsx)("thead",{children:(0,_.jsxs)("tr",{style:{height:"70px"},children:[(0,_.jsx)("th",{}),(0,_.jsx)("th",{}),t.measureNames.map(function(t){return(0,_.jsx)("th",{className:R().header,style:{color:e.props.cm.getColorForMeasure(t)},children:(0,_.jsx)("div",{className:R().rot,children:t})},t)})]})}),(0,_.jsx)("tbody",{children:t.challengeNames.map(function(n){var r=n.challenge,s=n.ds,a=n.rs,i=t.challengeResults[s],o=t.measureNames.map(function(t){var n=i[t],r=function(e){return 1};if(n.length>1){var s=(0,y.BYU)().domain([n[n.length-1].value,n[0].value]);r=function(e){return s(e)}}return(0,_.jsx)("td",{className:R().td_measure,style:{borderLeft:"1px solid "+e.props.cm.getColorForMeasure(t)},children:n.filter(function(t,n){return n<e.props.tableMode}).map(function(e,t){var n;return(0,_.jsx)(m.ZP,{theme:"translucent",content:1>Math.abs(n=e.value)?y.WUZ(".3f")(n):y.WUZ(".3s")(n),children:(0,_.jsxs)("div",{style:{fontWeight:0==t?900:400,whiteSpace:"nowrap"},className:R().measure,children:[(0,_.jsx)("svg",{height:10,width:30,children:(0,_.jsx)("rect",{width:20*r(e.value)+1,height:10,className:R().measureBar})}),e.sn]})},e.sn)})},t)});return(0,_.jsxs)("tr",{className:R().tr_measure,children:[a>0?(0,_.jsx)("td",{rowSpan:a,style:{position:"sticky",left:"0px"},className:R().challenge,children:r},"1st"):null,(0,_.jsx)("td",{className:R().td_measure,style:{position:"sticky",left:"75px",backgroundColor:"#eee"},children:s.split("_").join(" ")},"2nd"),o]},s)})})]})}}],[{key:"getDerivedStateFromProps",value:function(e,t){if(Object.keys(t.challengeResults).length>0&&t.columnFilter===e.columnFilter)return{};var n=e.columnFilter,r=Object.entries(e.config.measures).sort(),s=[],a={};r.forEach(function(t){var n=(0,x.Z)(t,2),r=n[0],i=n[1];(""===e.columnFilter||e.columnFilter===r)&&(s=[].concat((0,g.Z)(s),(0,g.Z)(i)),i.forEach(function(e){return a[e]=[]}))});var i=e.config.challenges,o=[],c={};return Object.entries(i).forEach(function(e){var t=(0,x.Z)(e,2),n=t[0],r=t[1];r.datasets.forEach(function(e,t){o.push({challenge:n,ds:e,rs:0===t?r.datasets.length:-1});var a={};s.forEach(function(e){return a[e]=[]}),c[e]=a})}),e.scores.forEach(function(e){var t=e.submission_name;Object.entries(e).forEach(function(e){var n=(0,x.Z)(e,2),r=n[0],a=n[1];"submission"!=r&&"string"!=typeof a&&r&&c[r]&&s.map(function(e){var n,s=e in a?(n=a[e],e.startsWith("rouge")?n.fmeasure:"bertscore"===e?n.f1:"nubia"===e?n.nubia_score:n):null;s&&c[r][e].push({value:s,sn:t})})})}),Object.entries(c).forEach(function(e){var t=(0,x.Z)(e,2);Object.entries((t[0],t[1])).forEach(function(e){var t=(0,x.Z)(e,2);(t[0],t[1]).sort(function(e,t){return t.value-e.value})})}),{measureNames:s,challengeResults:c,challengeNames:o,columnFilter:n}}}]),u}(h.PureComponent);(0,l.Z)(w,"defaultProps",{onDatasetHover:function(){},onFilterChange:function(){},onDatasetSelect:function(){},tableMode:5,columnFilter:""});var E=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{submissionFilter:null,highlighted:[],tableMode:5,columnFilter:"",loadingState:!0,scores:[]}),e}return(0,s.Z)(u,[{key:"componentDidMount",value:function(){var e=this;fetch("https://huggingface.co/datasets/GEM-submissions/submission-scores/resolve/main/filtered_scores.json").then(function(e){return e.json()}).then(function(t){e.setState({loadingState:!1,scores:t})})}},{key:"render",value:function(){var e=this;return this.state.loadingState?(0,_.jsx)(_.Fragment,{children:" loading ..."}):this.state.scores.length<1?(0,_.jsx)(_.Fragment,{children:" No data available "}):(0,_.jsxs)(_.Fragment,{children:[(0,_.jsxs)("div",{style:{display:"flex",alignItems:"stretch",flexWrap:"wrap"},children:[(0,_.jsxs)("div",{style:{flex:1},children:[(0,_.jsx)("h4",{style:{marginBottom:"5px"},children:" Submissions & Scores "}),(0,_.jsx)("div",{style:{maxHeight:"200px",overflowY:"auto"},children:(0,_.jsx)(M,{config:this.props.evalConfig,scores:this.state.scores,submissionFilter:this.state.submissionFilter,onHover:function(t,n){e.setState({highlighted:n?t:[]})},highlighted:this.state.highlighted})})]}),(0,_.jsxs)("div",{style:{flex:1},children:[(0,_.jsx)("h4",{style:{marginBottom:"5px"},children:" Measures "}),(0,_.jsx)("div",{style:{maxHeight:"200px",overflowY:"auto"},children:(0,_.jsx)(v,{config:this.props.evalConfig,cm:this.props.cm})})]})]}),(0,_.jsxs)("div",{children:[(0,_.jsx)("h4",{style:{marginBottom:"5px"},children:" Visualization "}),(0,_.jsx)("div",{style:{textAlign:"center"},children:(0,_.jsx)(B,{cm:this.props.cm,config:this.props.evalConfig,scores:this.state.scores,onFilterChange:function(t){e.setState({submissionFilter:t})},highlighted:this.state.highlighted,onDatasetHover:function(t,n){e.setState({highlighted:n?[t]:[]})}})})]}),(0,_.jsxs)("div",{children:[(0,_.jsx)("h4",{style:{marginBottom:"5px"},children:" Table "}),(0,_.jsxs)("div",{children:["Results:",(0,_.jsxs)("select",{className:"select",onChange:function(t){e.setState({tableMode:+t.target.value})},defaultValue:5,children:[(0,_.jsx)("option",{value:1/0,children:"all"}),(0,_.jsx)("option",{value:5,children:"top 5"}),(0,_.jsx)("option",{value:1,children:"top 1"})]}),", Measures:",(0,_.jsxs)("select",{className:"select",onChange:function(t){e.setState({columnFilter:t.target.value})},defaultValue:"",children:[(0,_.jsx)("option",{value:"",children:" all"}),Object.keys(this.props.evalConfig.measures).map(function(e){return(0,_.jsx)("option",{value:e,children:e},e)})]})]}),(0,_.jsx)("div",{style:{overflowX:"scroll"},children:(0,_.jsx)(w,{cm:this.props.cm,config:this.props.evalConfig,scores:this.state.scores,tableMode:this.state.tableMode,columnFilter:this.state.columnFilter})})]})]})}}]),u}(h.PureComponent),O=!0;function A(e){var t=p.generateForEvalConfig(e.evalConfig);return(0,_.jsx)(u.Z,{home:!1,nlAugmenter:!1,wideContainer:!0,children:(0,_.jsx)(E,{evalConfig:e.evalConfig,cm:t})})}},7534:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/results",function(){return n(1950)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},5631:function(e){e.exports={metaBox:"vis_measure_matrix_metaBox__AzAk6",inverse:"vis_measure_matrix_inverse__zCDHl",measureBox:"vis_measure_matrix_measureBox__zWF4L",matrix:"vis_measure_matrix_matrix__tE3e6"}},6589:function(e){e.exports={line:"vis_pc_plot_line__ls_DA",lineVisible:"vis_pc_plot_lineVisible__n83tN",lineInvisible:"vis_pc_plot_lineInvisible__Ju_73",selected:"vis_pc_plot_selected__VlYjE",label:"vis_pc_plot_label__ADirq",yAxis:"vis_pc_plot_yAxis__swyrT",extremaLabelTop:"vis_pc_plot_extremaLabelTop__ugyeL",extremaLabelBtm:"vis_pc_plot_extremaLabelBtm__n2_zR",textNon:"vis_pc_plot_textNon__RClWw"}},1962:function(e){e.exports={dsBox:"vis_submission_matrix_dsBox__UVnt_",dsBoxHover:"vis_submission_matrix_dsBoxHover__vD6s5",nonSelected:"vis_submission_matrix_nonSelected__k8IQs",selected:"vis_submission_matrix_selected__0SR0K",metaBox:"vis_submission_matrix_metaBox__pSd1L",metaMetaBox:"vis_submission_matrix_metaMetaBox__3OJYn",matrix:"vis_submission_matrix_matrix__5LWVg"}},6684:function(e){e.exports={header:"vis_table_tasks_header__8z_f1",td_measure:"vis_table_tasks_td_measure__IVDvS",measureBar:"vis_table_tasks_measureBar__V3ePe",measure:"vis_table_tasks_measure__8jTXP",challenge:"vis_table_tasks_challenge__mq0jz",tr_measure:"vis_table_tasks_tr_measure__CuQbZ",rot:"vis_table_tasks_rot__mdYVm"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,662,50,147,774,888,179],function(){return e(e.s=7534)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/results-d578c869a5275698.js b/_next/static/chunks/pages/results-d578c869a5275698.js
deleted file mode 100644
index 74083415..00000000
--- a/_next/static/chunks/pages/results-d578c869a5275698.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[255],{6057:function(e,t,n){"use strict";n.d(t,{Z:function(){return F},y:function(){return C}});var r=n(9008),s=n.n(r),a=n(2717),i=n.n(a),o=n(1943),c=n.n(o),l=n(7839),u=n.n(l),h=n(1664),f=n.n(h),d=n(2777),m=n(2262),_=n(748),v=n(5959),p=n(3553),x=n(7247),g=n(7294),b=n(4776),j=n.n(b),y=n(9417),N=n(7814),Z=n(5893),k=function(e){(0,v.Z)(r,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,x.Z)(r);if(t){var s=(0,x.Z)(this).constructor;e=Reflect.construct(n,arguments,s)}else e=n.apply(this,arguments);return(0,p.Z)(this,e)});function r(e){var t;return(0,d.Z)(this,r),(t=n.call(this,e)).handleMobileClick=t.handleMobileClick.bind((0,_.Z)(t)),t.state={active:!1},t}return(0,m.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,Z.jsxs)("div",{className:j().navwrapper,children:[(0,Z.jsx)("div",{className:j().gradbar}),(0,Z.jsxs)("nav",{className:j().navbar,children:[(0,Z.jsx)("span",{className:u().headingLg+" "+j().navbarlogo,children:(0,Z.jsx)(f(),{href:"/",children:(0,Z.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,Z.jsx)("div",{className:j().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,Z.jsx)(N.G,{className:j().bar,icon:y.xiG})}),(0,Z.jsxs)("ul",{className:this.state.active?j().nav+" "+j().mobilenav:j().nav,children:[(0,Z.jsx)("li",{className:this.state.active?j().navitem:j().navitem+" "+j().pushright,children:(0,Z.jsx)(f(),{href:"/resources/",children:(0,Z.jsx)("a",{children:"Resources"})})}),(0,Z.jsx)("li",{className:j().navitem,children:(0,Z.jsx)(f(),{href:"/data_cards/",children:(0,Z.jsx)("a",{children:"Data Cards"})})}),(0,Z.jsx)("li",{className:j().navitem,children:(0,Z.jsx)(f(),{href:"/model_cards",children:(0,Z.jsx)("a",{children:"Model Cards"})})}),(0,Z.jsx)("li",{className:j().navitem,children:(0,Z.jsx)(f(),{href:"/tutorials",children:(0,Z.jsx)("a",{children:"tutorials"})})}),(0,Z.jsx)("li",{className:j().navitem,children:(0,Z.jsx)(f(),{href:"/results/",children:(0,Z.jsx)("a",{children:"Results"})})}),(0,Z.jsx)("li",{className:j().navitem,children:(0,Z.jsx)(f(),{href:"/papers/",children:(0,Z.jsx)("a",{children:"Papers"})})}),(0,Z.jsx)("li",{className:j().navitem,children:(0,Z.jsx)(f(),{href:"/workshop",children:(0,Z.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(g.Component),C="GEM";function F(e){var t=e.children,n=e.home,r=e.nlAugmenter,a=e.wideContainer;return(0,Z.jsxs)(Z.Fragment,{children:[(0,Z.jsxs)(s(),{children:[(0,Z.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,Z.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,Z.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,Z.jsx)("meta",{name:"og:title",content:C}),(0,Z.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,Z.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,Z.jsx)("header",{className:i().header,children:(0,Z.jsx)(k,{})}),(0,Z.jsxs)("div",{className:"".concat(i().container," ").concat(a&&i().wideContainer),children:[(0,Z.jsx)("main",{children:t}),(0,Z.jsx)("div",{className:i().push})]}),(0,Z.jsxs)("footer",{className:i().footer+" "+u().eggshell,children:[!n&&(0,Z.jsx)("span",{className:i().backToHome,children:(0,Z.jsx)(f(),{href:"/",children:(0,Z.jsx)("a",{children:"← Home"})})}),(0,Z.jsxs)("span",{children:["If you have any questions, please join our ",(0,Z.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:u().accentUnderline,children:"google group"})," for support."]})]})]})]})}},1950:function(e,t,n){"use strict";n.r(t),n.d(t,{__N_SSG:function(){return O},default:function(){return A}});var r=n(2777),s=n(2262),a=n(748),i=n(5959),o=n(3553),c=n(7247),l=n(9499),u=n(6057),h=n(7294),f=n(5631),d=n.n(f),m=n(1736);n(3042);var _=n(5893),v=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{}),e}return(0,s.Z)(u,[{key:"render",value:function(){var e=this,t=this.props.config.measures,n=this.props.config.common_metrics,r=Object.keys(t).sort().map(function(r,s){var a=e.props.cm.getColorForMeasure(r),i=t[r].map(function(e){return(0,_.jsx)(m.ZP,{content:n[e].description,delay:0,children:(0,_.jsx)("div",{className:d().measureBox,style:{borderColor:a},children:n[e].show_as})},e)});return(0,_.jsxs)("div",{style:{display:"flex",flexDirection:"row"},children:[(0,_.jsx)("div",{className:[d().metaBox].join(" "),style:{background:a},children:r}),(0,_.jsx)("div",{children:i})]},r)});return(0,_.jsx)("div",{className:[d().matrix].join(" "),children:r})}}]),u}(h.Component),p=function(){function e(){var t=this;(0,r.Z)(this,e),(0,l.Z)(this,"colors",["#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854","#ffd92f","#e5c494","#b3b3b3"]),(0,l.Z)(this,"colorMap",{}),(0,l.Z)(this,"drawColor",function(e){return t.colors[e%t.colors.length]}),(0,l.Z)(this,"getColorForMeasure",function(e){return e in t.colorMap?t.colorMap[e]:""})}return(0,s.Z)(e,null,[{key:"generateForEvalConfig",value:function(t){var n=new e,r=t.measures;return Object.keys(r).sort().map(function(e,t){var s=n.drawColor(t);n.colorMap[e]=s,r[e].map(function(e){n.colorMap[e]=s})}),n}}]),e}(),x=n(7812),g=n(6835),b=n(7272),j=n(6589),y=n.n(j),N=n(6488);n(5735);var Z=n(6486);function k(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=Array(t);n<t;n++)r[n]=e[n];return r}var C=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{datasetMatrix:[],datasetNames:[],measureNames:[],xScale:null,yScales:[],filters:{}}),(0,l.Z)((0,a.Z)(e),"brushes",h.createRef()),(0,l.Z)((0,a.Z)(e),"lineGen",b.jvg().defined(function(e){return!!e}).x(function(t,n){return e.state.xScale(n)}).y(function(t,n){return u.height-e.state.yScales[n](t)})),(0,l.Z)((0,a.Z)(e),"selectDataset",function(t){e.setState({selectedDatasets:[t]})}),e}return(0,s.Z)(u,[{key:"componentDidMount",value:function(){var e=this,t=this.state,n=function(e,n){var r=(0,g.Z)(e,2),s=r[0],a=(0,g.Z)(r[1],2),i=a[0],o=a[1];return i<=n[t.measureNames.indexOf(s)]&&n[t.measureNames.indexOf(s)]<=o},r=(0,Z.throttle)(function(){var r=Object.entries(t.filters);if(0===r.length)e.props.onFilterChange(null);else{var s=t.datasetNames.filter(function(e,s){return r.every(function(e){return n(e,t.datasetMatrix[s])})});e.props.onFilterChange(s)}},300),s=(0,Z.throttle)(this.forceUpdate,100),a=b.Lae().extent([[-3,0],[3,u.height]]).on("end brush",function(n,a){null===n.selection?delete t.filters[a]:t.filters[a]=n.selection.map(function(e){return u.height-e}).map(t.yScales[t.measureNames.indexOf(a)].invert).reverse(),s.call(e),r.call(e)});b.Ys(this.brushes.current).selectAll(".brush").data(this.state.measureNames).join("g").attr("class","brush").attr("transform",function(t,n){return"translate(".concat(e.state.xScale(n),",0)")}).call(a)}},{key:"render",value:function(){var e=this,t=this.state,n=this.props,r=function(e){return 1>Math.abs(e)?b.WUZ(".3f")(e):b.WUZ(".3s")(e)};return(0,_.jsx)("svg",{height:270,width:this.state.measureNames.length*u.slotWidth+50,children:(0,_.jsxs)("g",{transform:"translate(30,20)",children:[(0,_.jsx)("g",{className:"bg"}),(0,_.jsx)("g",{className:"labels",children:t.measureNames.map(function(n,r){return(0,_.jsx)("text",{transform:"translate(".concat(t.xScale(r),",").concat(u.height+25,")rotate(30)"),className:y().label,style:{fill:e.props.cm.getColorForMeasure(n)},children:e.props.config.common_metrics[n].show_as},n)})}),(0,_.jsx)("g",{className:"yAxes",children:t.measureNames.map(function(n,r){return(0,_.jsx)("line",{className:y().yAxis,x1:t.xScale(r),x2:t.xScale(r),y1:-5,y2:u.height+5,style:{stroke:e.props.cm.getColorForMeasure(n)}},n)})}),(0,_.jsx)("g",{className:"minMaxValues",children:t.yScales.map(function(e,n){return(0,_.jsxs)(h.Fragment,{children:[(0,_.jsx)("text",{className:[y().extremaLabelTop,y().textNon].join(" "),transform:"translate(".concat(t.xScale(n),",-7)"),children:r(e.domain()[1])}),(0,_.jsx)("text",{className:[y().extremaLabelBtm,y().textNon].join(" "),transform:"translate(".concat(t.xScale(n),",").concat(u.height+7+7,")"),children:r(e.domain()[0])})]},t.measureNames[n])})}),(0,_.jsx)("g",{className:"dataLines",children:t.datasetMatrix.map(function(r,s){var a;return(0,_.jsx)(m.ZP,{followCursor:!0,theme:"translucent",content:t.datasetNames[s],plugins:[N.Cv],children:(0,_.jsx)("path",{d:e.lineGen(r),className:(a=Object.entries(t.filters),[y().line,n.highlighted.indexOf(t.datasetNames[s])>-1?y().selected:"",a.length>0?a.every(function(e){var n=(0,g.Z)(e,2),s=n[0],a=(0,g.Z)(n[1],2),i=a[0],o=a[1];return i<=r[t.measureNames.indexOf(s)]&&r[t.measureNames.indexOf(s)]<=o})?y().lineVisible:y().lineInvisible:""].join(" ")),onMouseEnter:function(){return n.onDatasetHover(t.datasetNames[s],!0)},onMouseLeave:function(){return n.onDatasetHover(t.datasetNames[s],!1)}})},t.datasetNames[s])})}),(0,_.jsx)("g",{className:"brushes",ref:this.brushes})]})})}}],[{key:"getDerivedStateFromProps",value:function(e,t){if(t.datasetMatrix.length>0||e.scores.length<1)return{};var n=Object.entries(e.config.measures).sort(),r=[],s={};n.forEach(function(e){var t=(0,g.Z)(e,2),n=(t[0],t[1]);r=[].concat((0,x.Z)(r),(0,x.Z)(n.sort())),n.forEach(function(e){return s[e]=0})});var a=[],i=[];e.scores.forEach(function(e){var t=e.submission_name;Object.entries(e).forEach(function(e){var n=(0,g.Z)(e,2),o=n[0],c=n[1];"submission"!=o&&"string"!=typeof c&&"number"!=typeof c&&(o.endsWith("_test")||o.endsWith("test_turk")||o.endsWith("test_asset"))&&o&&(i.push(t+"."+o),a.push(r.map(function(e){var t,n=e in c?(t=c[e],e.startsWith("rouge")?t.fmeasure:"bertscore"===e?t.f1:"nubia"===e?t.nubia_score:t):null;return n&&(s[e]+=1),n})))})});var o,c=a[0].map(function(){return b.BYU().range([0,u.height])}),l=function(e,t){var n="undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(!n){if(Array.isArray(e)||(n=function(e,t){if(e){if("string"==typeof e)return k(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);if("Object"===n&&e.constructor&&(n=e.constructor.name),"Map"===n||"Set"===n)return Array.from(e);if("Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return k(e,t)}}(e))){n&&(e=n);var r=0,s=function(){};return{s:s,n:function(){return r>=e.length?{done:!0}:{done:!1,value:e[r++]}},e:function(e){throw e},f:s}}throw TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var a,i=!0,o=!1;return{s:function(){n=n.call(e)},n:function(){var e=n.next();return i=e.done,e},e:function(e){o=!0,a=e},f:function(){try{i||null==n.return||n.return()}finally{if(o)throw a}}}}(b.w6H(a[0].length));try{for(l.s();!(o=l.n()).done;)!function(){var e=o.value,t=b.Wem(a.map(function(t){return t[e]}));c[e].domain(t)}()}catch(e){l.e(e)}finally{l.f()}return{yScales:c,xScale:b.BYU().range([0,u.slotWidth]),datasetNames:i,measureNames:r,datasetMatrix:a}}}]),u}(h.PureComponent);(0,l.Z)(C,"defaultProps",{onDatasetHover:function(){},onFilterChange:function(){},onDatasetSelect:function(){},highlighted:[]}),(0,l.Z)(C,"height",150),(0,l.Z)(C,"slotWidth",30);var F=n(1962),M=n.n(F),S=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{datasetHierarchy:[]}),e}return(0,s.Z)(u,[{key:"render",value:function(){var e=this,t=this.props.submissionFilter,n=this.props.highlighted,r=this.state.datasetHierarchy.map(function(r){var s=r.datasets.map(function(r){var s=r.ds,a=r.submissions.map(function(r){return(0,_.jsx)("div",{className:[M().dsBox,n.indexOf("".concat(r,".").concat(s))>-1?M().dsBoxHover:"",t?t.indexOf("".concat(r,".").concat(s))>-1?M().selected:M().nonSelected:""].join(" "),onMouseEnter:function(){return e.props.onHover(["".concat(r,".").concat(s)],!0)},onMouseLeave:function(){return e.props.onHover(["".concat(r,".").concat(s)],!1)},children:r},r)});return(0,_.jsxs)("div",{style:{display:"flex"},children:[(0,_.jsx)("div",{className:M().metaBox,onMouseEnter:function(){return e.props.onHover(r.submissions.map(function(e){return"".concat(e,".").concat(s)}),!0)},onMouseLeave:function(){return e.props.onHover(r.submissions.map(function(e){return"".concat(e,".").concat(s)}),!1)},children:r.ds}),(0,_.jsx)("div",{children:a})]},r.ds)});return(0,_.jsxs)("div",{style:{display:"flex",margin:"2pt 0"},children:[(0,_.jsx)("div",{className:M().metaMetaBox,children:r.task}),(0,_.jsx)("div",{children:s})]},r.task)});return(0,_.jsx)("div",{className:M().matrix,children:r})}}],[{key:"getDerivedStateFromProps",value:function(e,t){return{datasetHierarchy:Object.keys(e.config.challenges).sort().map(function(t){var n=e.config.challenges[t].datasets.map(function(t){var n="".concat(t),r=e.scores.filter(function(e){return n in e}).map(function(e){return e.submission_name});return{ds:n,submissions:r}}).filter(function(e){return e.submissions.length>0}).sort(function(e,t){return b.j2p(e.ds,t.ds)});return{task:t,datasets:n}})}}}]),u}(h.PureComponent);(0,l.Z)(S,"defaultProps",{submissionFilter:null,highlighted:[]});var B=n(6684),R=n.n(B),w=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{measureNames:[],challengeResults:{},challengeNames:[],columnFilter:""}),e}return(0,s.Z)(u,[{key:"componentDidMount",value:function(){}},{key:"render",value:function(){var e=this,t=this.state;return(0,_.jsxs)("table",{className:"table",style:{fontSize:"8pt",borderCollapse:"collapse",cursor:"crosshair",marginBottom:"40px",marginTop:"10px"},children:[(0,_.jsx)("thead",{children:(0,_.jsxs)("tr",{style:{height:"70px"},children:[(0,_.jsx)("th",{}),(0,_.jsx)("th",{}),t.measureNames.map(function(t){return(0,_.jsx)("th",{className:R().header,style:{color:e.props.cm.getColorForMeasure(t)},children:(0,_.jsx)("div",{className:R().rot,children:t})},t)})]})}),(0,_.jsx)("tbody",{children:t.challengeNames.map(function(n){var r=n.challenge,s=n.ds,a=n.rs,i=t.challengeResults[s],o=t.measureNames.map(function(t){var n=i[t],r=function(e){return 1};if(n.length>1){var s=(0,b.BYU)().domain([n[n.length-1].value,n[0].value]);r=function(e){return s(e)}}return(0,_.jsx)("td",{className:R().td_measure,style:{borderLeft:"1px solid "+e.props.cm.getColorForMeasure(t)},children:n.filter(function(t,n){return n<e.props.tableMode}).map(function(e,t){var n;return(0,_.jsx)(m.ZP,{theme:"translucent",content:1>Math.abs(n=e.value)?b.WUZ(".3f")(n):b.WUZ(".3s")(n),children:(0,_.jsxs)("div",{style:{fontWeight:0==t?900:400,whiteSpace:"nowrap"},className:R().measure,children:[(0,_.jsx)("svg",{height:10,width:30,children:(0,_.jsx)("rect",{width:20*r(e.value)+1,height:10,className:R().measureBar})}),e.sn]})},e.sn)})},t)});return(0,_.jsxs)("tr",{className:R().tr_measure,children:[a>0?(0,_.jsx)("td",{rowSpan:a,style:{position:"sticky",left:"0px"},className:R().challenge,children:r},"1st"):null,(0,_.jsx)("td",{className:R().td_measure,style:{position:"sticky",left:"75px",backgroundColor:"#eee"},children:s.split("_").join(" ")},"2nd"),o]},s)})})]})}}],[{key:"getDerivedStateFromProps",value:function(e,t){if(Object.keys(t.challengeResults).length>0&&t.columnFilter===e.columnFilter)return{};var n=e.columnFilter,r=Object.entries(e.config.measures).sort(),s=[],a={};r.forEach(function(t){var n=(0,g.Z)(t,2),r=n[0],i=n[1];(""===e.columnFilter||e.columnFilter===r)&&(s=[].concat((0,x.Z)(s),(0,x.Z)(i)),i.forEach(function(e){return a[e]=[]}))});var i=e.config.challenges,o=[],c={};return Object.entries(i).forEach(function(e){var t=(0,g.Z)(e,2),n=t[0],r=t[1];r.datasets.forEach(function(e,t){o.push({challenge:n,ds:e,rs:0===t?r.datasets.length:-1});var a={};s.forEach(function(e){return a[e]=[]}),c[e]=a})}),e.scores.forEach(function(e){var t=e.submission_name;Object.entries(e).forEach(function(e){var n=(0,g.Z)(e,2),r=n[0],a=n[1];"submission"!=r&&"string"!=typeof a&&r&&c[r]&&s.map(function(e){var n,s=e in a?(n=a[e],e.startsWith("rouge")?n.fmeasure:"bertscore"===e?n.f1:"nubia"===e?n.nubia_score:n):null;s&&c[r][e].push({value:s,sn:t})})})}),Object.entries(c).forEach(function(e){var t=(0,g.Z)(e,2);Object.entries((t[0],t[1])).forEach(function(e){var t=(0,g.Z)(e,2);(t[0],t[1]).sort(function(e,t){return t.value-e.value})})}),{measureNames:s,challengeResults:c,challengeNames:o,columnFilter:n}}}]),u}(h.PureComponent);(0,l.Z)(w,"defaultProps",{onDatasetHover:function(){},onFilterChange:function(){},onDatasetSelect:function(){},tableMode:5,columnFilter:""});var E=function(e){(0,i.Z)(u,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(u);if(t){var r=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,o.Z)(this,e)});function u(){var e;(0,r.Z)(this,u);for(var t=arguments.length,s=Array(t),i=0;i<t;i++)s[i]=arguments[i];return e=n.call.apply(n,[this].concat(s)),(0,l.Z)((0,a.Z)(e),"state",{submissionFilter:null,highlighted:[],tableMode:5,columnFilter:"",loadingState:!0,scores:[]}),e}return(0,s.Z)(u,[{key:"componentDidMount",value:function(){var e=this;fetch("https://huggingface.co/datasets/GEM-submissions/submission-scores/resolve/main/filtered_scores.json").then(function(e){return e.json()}).then(function(t){e.setState({loadingState:!1,scores:t})})}},{key:"render",value:function(){var e=this;return this.state.loadingState?(0,_.jsx)(_.Fragment,{children:" loading ..."}):this.state.scores.length<1?(0,_.jsx)(_.Fragment,{children:" No data available "}):(0,_.jsxs)(_.Fragment,{children:[(0,_.jsxs)("div",{style:{display:"flex",alignItems:"stretch",flexWrap:"wrap"},children:[(0,_.jsxs)("div",{style:{flex:1},children:[(0,_.jsx)("h4",{style:{marginBottom:"5px"},children:" Submissions & Scores "}),(0,_.jsx)("div",{style:{maxHeight:"200px",overflowY:"auto"},children:(0,_.jsx)(S,{config:this.props.evalConfig,scores:this.state.scores,submissionFilter:this.state.submissionFilter,onHover:function(t,n){e.setState({highlighted:n?t:[]})},highlighted:this.state.highlighted})})]}),(0,_.jsxs)("div",{style:{flex:1},children:[(0,_.jsx)("h4",{style:{marginBottom:"5px"},children:" Measures "}),(0,_.jsx)("div",{style:{maxHeight:"200px",overflowY:"auto"},children:(0,_.jsx)(v,{config:this.props.evalConfig,cm:this.props.cm})})]})]}),(0,_.jsxs)("div",{children:[(0,_.jsx)("h4",{style:{marginBottom:"5px"},children:" Visualization "}),(0,_.jsx)("div",{style:{textAlign:"center"},children:(0,_.jsx)(C,{cm:this.props.cm,config:this.props.evalConfig,scores:this.state.scores,onFilterChange:function(t){e.setState({submissionFilter:t})},highlighted:this.state.highlighted,onDatasetHover:function(t,n){e.setState({highlighted:n?[t]:[]})}})})]}),(0,_.jsxs)("div",{children:[(0,_.jsx)("h4",{style:{marginBottom:"5px"},children:" Table "}),(0,_.jsxs)("div",{children:["Results:",(0,_.jsxs)("select",{className:"select",onChange:function(t){e.setState({tableMode:+t.target.value})},defaultValue:5,children:[(0,_.jsx)("option",{value:1/0,children:"all"}),(0,_.jsx)("option",{value:5,children:"top 5"}),(0,_.jsx)("option",{value:1,children:"top 1"})]}),", Measures:",(0,_.jsxs)("select",{className:"select",onChange:function(t){e.setState({columnFilter:t.target.value})},defaultValue:"",children:[(0,_.jsx)("option",{value:"",children:" all"}),Object.keys(this.props.evalConfig.measures).map(function(e){return(0,_.jsx)("option",{value:e,children:e},e)})]})]}),(0,_.jsx)("div",{style:{overflowX:"scroll"},children:(0,_.jsx)(w,{cm:this.props.cm,config:this.props.evalConfig,scores:this.state.scores,tableMode:this.state.tableMode,columnFilter:this.state.columnFilter})})]})]})}}]),u}(h.PureComponent),O=!0;function A(e){var t=p.generateForEvalConfig(e.evalConfig);return(0,_.jsx)(u.Z,{home:!1,nlAugmenter:!1,wideContainer:!0,children:(0,_.jsx)(E,{evalConfig:e.evalConfig,cm:t})})}},7534:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/results",function(){return n(1950)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},5631:function(e){e.exports={metaBox:"vis_measure_matrix_metaBox__AzAk6",inverse:"vis_measure_matrix_inverse__zCDHl",measureBox:"vis_measure_matrix_measureBox__zWF4L",matrix:"vis_measure_matrix_matrix__tE3e6"}},6589:function(e){e.exports={line:"vis_pc_plot_line__ls_DA",lineVisible:"vis_pc_plot_lineVisible__n83tN",lineInvisible:"vis_pc_plot_lineInvisible__Ju_73",selected:"vis_pc_plot_selected__VlYjE",label:"vis_pc_plot_label__ADirq",yAxis:"vis_pc_plot_yAxis__swyrT",extremaLabelTop:"vis_pc_plot_extremaLabelTop__ugyeL",extremaLabelBtm:"vis_pc_plot_extremaLabelBtm__n2_zR",textNon:"vis_pc_plot_textNon__RClWw"}},1962:function(e){e.exports={dsBox:"vis_submission_matrix_dsBox__UVnt_",dsBoxHover:"vis_submission_matrix_dsBoxHover__vD6s5",nonSelected:"vis_submission_matrix_nonSelected__k8IQs",selected:"vis_submission_matrix_selected__0SR0K",metaBox:"vis_submission_matrix_metaBox__pSd1L",metaMetaBox:"vis_submission_matrix_metaMetaBox__3OJYn",matrix:"vis_submission_matrix_matrix__5LWVg"}},6684:function(e){e.exports={header:"vis_table_tasks_header__8z_f1",td_measure:"vis_table_tasks_td_measure__IVDvS",measureBar:"vis_table_tasks_measureBar__V3ePe",measure:"vis_table_tasks_measure__8jTXP",challenge:"vis_table_tasks_challenge__mq0jz",tr_measure:"vis_table_tasks_tr_measure__CuQbZ",rot:"vis_table_tasks_rot__mdYVm"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,662,50,147,774,888,179],function(){return e(e.s=7534)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/shared_task-05cde75cf2b87867.js b/_next/static/chunks/pages/shared_task-05cde75cf2b87867.js
deleted file mode 100644
index ee0557ba..00000000
--- a/_next/static/chunks/pages/shared_task-05cde75cf2b87867.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[200],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return F},y:function(){return C}});var r=a(9008),t=a.n(r),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),d=a.n(h),u=a(2777),m=a(2262),g=a(748),v=a(5959),x=a(3553),p=a(7247),f=a(7294),j=a(4776),b=a.n(j),k=a(9417),N=a(7814),y=a(5893),w=function(e){(0,v.Z)(r,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,p.Z)(r);if(n){var t=(0,p.Z)(this).constructor;e=Reflect.construct(a,arguments,t)}else e=a.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var n;return(0,u.Z)(this,r),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(N.G,{className:b().bar,icon:k.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(d(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),C="GEM";function F(e){var n=e.children,a=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(t(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},72:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var r=a(6057),t=a(9008),s=a.n(t),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.sharedTaskData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2021"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"Shared Task at the GEM Workshop at ACL 2021"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},562:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/shared_task",function(){return a(72)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=562)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/shared_task-18f5bf90896da33a.js b/_next/static/chunks/pages/shared_task-18f5bf90896da33a.js
new file mode 100644
index 00000000..7eed7f80
--- /dev/null
+++ b/_next/static/chunks/pages/shared_task-18f5bf90896da33a.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[200],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return w}});var r=n(9008),t=n.n(r),s=n(2717),i=n.n(s),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),k=n(7814),N=n(5893),B=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),w="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:w}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:i().header,children:(0,N.jsx)(B,{})}),(0,N.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,N.jsx)("main",{children:a}),(0,N.jsx)("div",{className:i().push})]}),(0,N.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,N.jsx)("span",{className:i().backToHome,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},72:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),s=n.n(t),i=n(7839),l=n.n(i),c=n(5893),o=!0;function _(e){var a=e.sharedTaskData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2021"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"Shared Task at the GEM Workshop at ACL 2021"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},562:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/shared_task",function(){return n(72)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=562)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/team-52cb272d62212456.js b/_next/static/chunks/pages/team-52cb272d62212456.js
deleted file mode 100644
index 1c0e4e68..00000000
--- a/_next/static/chunks/pages/team-52cb272d62212456.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[38],{6057:function(e,t,n){"use strict";n.d(t,{Z:function(){return C},y:function(){return M}});var a=n(9008),r=n.n(a),s=n(2717),i=n.n(s),c=n(1943),l=n.n(c),o=n(7839),d=n.n(o),_=n(1664),h=n.n(_),u=n(2777),m=n(2262),g=n(748),f=n(5959),v=n(3553),x=n(7247),p=n(7294),j=n(4776),b=n.n(j),N=n(9417),y=n(7814),k=n(5893),w=function(e){(0,f.Z)(a,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,x.Z)(a);if(t){var r=(0,x.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,v.Z)(this,e)});function a(e){var t;return(0,u.Z)(this,a),(t=n.call(this,e)).handleMobileClick=t.handleMobileClick.bind((0,g.Z)(t)),t.state={active:!1},t}return(0,m.Z)(a,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(h(),{href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(y.G,{className:b().bar,icon:N.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(h(),{href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(h(),{href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),a}(p.Component),M="GEM";function C(e){var t=e.children,n=e.home,a=e.nlAugmenter,s=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(r(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:M}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(i().background," ").concat(a&&l().background),children:[(0,k.jsx)("header",{className:i().header,children:(0,k.jsx)(w,{})}),(0,k.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,k.jsx)("main",{children:t}),(0,k.jsx)("div",{className:i().push})]}),(0,k.jsxs)("footer",{className:i().footer+" "+d().eggshell,children:[!n&&(0,k.jsx)("span",{className:i().backToHome,children:(0,k.jsx)(h(),{href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},1500:function(e,t,n){"use strict";n.r(t),n.d(t,{__N_SSG:function(){return w},default:function(){return M}});var a=n(2777),r=n(2262),s=n(5959),i=n(3553),c=n(7247),l=n(9008),o=n.n(l),d=n(6057),_=n(1664),h=n.n(_),u=n(7294),m=n(7839),g=n.n(m),f=n(8267),v=n.n(f),x=n(1294),p=n(9417),j=n(3024),b=n(7814),N=n(5893);function y(e){var t="";""!=e.website&&(t=(0,N.jsx)("a",{href:e.website,target:"_blank",children:(0,N.jsx)(b.G,{className:g().icon,icon:"user"})}));var n="";if(""!=e.twitter){var a="https://twitter.com/"+e.twitter;n=(0,N.jsx)("a",{href:a,target:"_blank",children:(0,N.jsx)(b.G,{className:g().icon,icon:j.mdU})})}var r="";(""!=n||""!=t)&&(r=(0,N.jsxs)("div",{children:[t," ",(0,N.jsx)("span",{className:v().spacer})," ",n]}));var s="";return""!=e.tags&&void 0!=e.tags&&(s=(0,N.jsx)("div",{className:v().tags,children:e.tags.map(function(e,t){return(0,N.jsx)("div",{children:e},t)})})),(0,N.jsxs)("div",{className:v().card,children:[(0,N.jsx)("h3",{className:v().name,children:e.name}),(0,N.jsx)("p",{className:v().title,children:e.organization}),(0,N.jsx)("div",{className:v().note,children:e.note}),r,s]})}x.vI.add(p.ILF);var k=function(e){(0,s.Z)(l,e);var t,n=(t=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(l);if(t){var a=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,a)}else e=n.apply(this,arguments);return(0,i.Z)(this,e)});function l(e){(0,a.Z)(this,l);var t,r=(t=n.call(this,e)).props.contacts.map(function(e,t){return(0,N.jsx)(y,{name:e.name,position:e.position,organization:e.organization,website:e.website,twitter:e.twitter,note:e.note,tags:e.tags},t)});return t.state={contact_cards:r},t}return(0,r.Z)(l,[{key:"componentDidMount",value:function(){for(var e=this.state.contact_cards.slice(),t=e.length-1;t>0;t--){var n=Math.floor(Math.random()*(t+1)),a=[e[n],e[t]];e[t]=a[0],e[n]=a[1]}this.setState({contact_cards:e})}},{key:"render",value:function(){return(0,N.jsx)("section",{className:v().cards,children:this.state.contact_cards})}}]),l}(u.Component),w=!0;function M(e){var t=e.teamData;return(0,N.jsxs)(d.Z,{home:!0,children:[(0,N.jsx)(o(),{children:(0,N.jsx)("title",{children:"GEMv2 Team 2022"})}),(0,N.jsxs)("article",{children:[(0,N.jsx)("div",{className:g().headingXl,children:"GEMv2 Team"}),(0,N.jsxs)("div",{className:v().description,children:["GEM is a community-driven effort to improve evaluation of natural language generation. It would not be possible without a large group of collaborators to take on challenging tasks. You can see the contributor list to GEMv1 ",(0,N.jsx)(h(),{href:"/team/2021",children:(0,N.jsx)("a",{children:"here"})}),".",(0,N.jsxs)("p",{children:["This page acts as a directory of our amazing contributors. If you want to join the organization,  ",(0,N.jsx)(h(),{href:"/team/join",children:(0,N.jsx)("a",{children:"click here to fill out the sign-up form."})})]})]}),(0,N.jsx)("div",{className:v().centered,children:(0,N.jsx)(k,{contacts:t.teamMembers})})]})]})}},2268:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/team",function(){return n(1500)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},8267:function(e){e.exports={description:"team_description__DjHeY",name:"team_name__nlBxC",title:"team_title__Fwwzf",note:"team_note__rPDRL",spacer:"team_spacer__yxU0o",centered:"team_centered__joWCZ",cards:"team_cards__RqvG4",card:"team_card__yrnb2",tags:"team_tags__rGyvu"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,948,50,774,888,179],function(){return e(e.s=2268)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/team-60b30d02a89aa79d.js b/_next/static/chunks/pages/team-60b30d02a89aa79d.js
new file mode 100644
index 00000000..3de36170
--- /dev/null
+++ b/_next/static/chunks/pages/team-60b30d02a89aa79d.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[38],{6057:function(e,a,t){"use strict";t.d(a,{Z:function(){return M},y:function(){return B}});var n=t(9008),r=t.n(n),i=t(2717),s=t.n(i),c=t(1943),l=t.n(c),o=t(7839),h=t.n(o),d=t(1664),_=t.n(d),u=t(2777),m=t(2262),g=t(748),v=t(5959),f=t(3553),x=t(7247),p=t(7294),j=t(4776),b=t.n(j),y=t(9417),N=t(7814),k=t(5893),w=function(e){(0,v.Z)(n,e);var a,t=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,t=(0,x.Z)(n);if(a){var r=(0,x.Z)(this).constructor;e=Reflect.construct(t,arguments,r)}else e=t.apply(this,arguments);return(0,f.Z)(this,e)});function n(e){var a;return(0,u.Z)(this,n),(a=t.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(n,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:h().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(N.G,{className:b().bar,icon:y.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),n}(p.Component),B="GEM";function M(e){var a=e.children,t=e.home,n=e.nlAugmenter,i=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(r(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:B}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(s().background," ").concat(n&&l().background),children:[(0,k.jsx)("header",{className:s().header,children:(0,k.jsx)(w,{})}),(0,k.jsxs)("div",{className:"".concat(s().container," ").concat(i&&s().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:s().push})]}),(0,k.jsxs)("footer",{className:s().footer+" "+h().eggshell,children:[!t&&(0,k.jsx)("span",{className:s().backToHome,children:(0,k.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:h().accentUnderline,children:"google group"})," for support."]})]})]})]})}},1500:function(e,a,t){"use strict";t.r(a),t.d(a,{__N_SSG:function(){return w},default:function(){return B}});var n=t(2777),r=t(2262),i=t(5959),s=t(3553),c=t(7247),l=t(9008),o=t.n(l),h=t(6057),d=t(1664),_=t.n(d),u=t(7294),m=t(7839),g=t.n(m),v=t(8267),f=t.n(v),x=t(1294),p=t(9417),j=t(3024),b=t(7814),y=t(5893);function N(e){var a="";""!=e.website&&(a=(0,y.jsx)("a",{href:e.website,target:"_blank",children:(0,y.jsx)(b.G,{className:g().icon,icon:"user"})}));var t="";if(""!=e.twitter){var n="https://twitter.com/"+e.twitter;t=(0,y.jsx)("a",{href:n,target:"_blank",children:(0,y.jsx)(b.G,{className:g().icon,icon:j.mdU})})}var r="";(""!=t||""!=a)&&(r=(0,y.jsxs)("div",{children:[a," ",(0,y.jsx)("span",{className:f().spacer})," ",t]}));var i="";return""!=e.tags&&void 0!=e.tags&&(i=(0,y.jsx)("div",{className:f().tags,children:e.tags.map(function(e,a){return(0,y.jsx)("div",{children:e},a)})})),(0,y.jsxs)("div",{className:f().card,children:[(0,y.jsx)("h3",{className:f().name,children:e.name}),(0,y.jsx)("p",{className:f().title,children:e.organization}),(0,y.jsx)("div",{className:f().note,children:e.note}),r,i]})}x.vI.add(p.ILF);var k=function(e){(0,i.Z)(l,e);var a,t=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,t=(0,c.Z)(l);if(a){var n=(0,c.Z)(this).constructor;e=Reflect.construct(t,arguments,n)}else e=t.apply(this,arguments);return(0,s.Z)(this,e)});function l(e){(0,n.Z)(this,l);var a,r=(a=t.call(this,e)).props.contacts.map(function(e,a){return(0,y.jsx)(N,{name:e.name,position:e.position,organization:e.organization,website:e.website,twitter:e.twitter,note:e.note,tags:e.tags},a)});return a.state={contact_cards:r},a}return(0,r.Z)(l,[{key:"componentDidMount",value:function(){for(var e=this.state.contact_cards.slice(),a=e.length-1;a>0;a--){var t=Math.floor(Math.random()*(a+1)),n=[e[t],e[a]];e[a]=n[0],e[t]=n[1]}this.setState({contact_cards:e})}},{key:"render",value:function(){return(0,y.jsx)("section",{className:f().cards,children:this.state.contact_cards})}}]),l}(u.Component),w=!0;function B(e){var a=e.teamData;return(0,y.jsxs)(h.Z,{home:!0,children:[(0,y.jsx)(o(),{children:(0,y.jsx)("title",{children:"GEMv2 Team 2022"})}),(0,y.jsxs)("article",{children:[(0,y.jsx)("div",{className:g().headingXl,children:"GEMv2 Team"}),(0,y.jsxs)("div",{className:f().description,children:["GEM is a community-driven effort to improve evaluation of natural language generation. It would not be possible without a large group of collaborators to take on challenging tasks. You can see the contributor list to GEMv1 ",(0,y.jsx)(_(),{legacyBehavior:!0,href:"/team/2021",children:(0,y.jsx)("a",{children:"here"})}),".",(0,y.jsxs)("p",{children:["This page acts as a directory of our amazing contributors. If you want to join the organization,  ",(0,y.jsx)(_(),{legacyBehavior:!0,href:"/team/join",children:(0,y.jsx)("a",{children:"click here to fill out the sign-up form."})})]})]}),(0,y.jsx)("div",{className:f().centered,children:(0,y.jsx)(k,{contacts:a.teamMembers})})]})]})}},2268:function(e,a,t){(window.__NEXT_P=window.__NEXT_P||[]).push(["/team",function(){return t(1500)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},8267:function(e){e.exports={description:"team_description__DjHeY",name:"team_name__nlBxC",title:"team_title__Fwwzf",note:"team_note__rPDRL",spacer:"team_spacer__yxU0o",centered:"team_centered__joWCZ",cards:"team_cards__RqvG4",card:"team_card__yrnb2",tags:"team_tags__rGyvu"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,948,50,774,888,179],function(){return e(e.s=2268)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/team/2021-13f83fded5cb2810.js b/_next/static/chunks/pages/team/2021-13f83fded5cb2810.js
new file mode 100644
index 00000000..625e1ea4
--- /dev/null
+++ b/_next/static/chunks/pages/team/2021-13f83fded5cb2810.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[926],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return M},y:function(){return B}});var t=n(9008),r=n.n(t),s=n(2717),i=n.n(s),c=n(1943),l=n.n(c),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),m=n(2262),g=n(748),v=n(5959),f=n(3553),p=n(7247),x=n(7294),j=n(4776),b=n.n(j),y=n(9417),N=n(7814),k=n(5893),w=function(e){(0,v.Z)(t,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(t);if(a){var r=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,r)}else e=n.apply(this,arguments);return(0,f.Z)(this,e)});function t(e){var a;return(0,u.Z)(this,t),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(t,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(N.G,{className:b().bar,icon:y.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),t}(x.Component),B="GEM";function M(e){var a=e.children,n=e.home,t=e.nlAugmenter,s=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(r(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:B}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(i().background," ").concat(t&&l().background),children:[(0,k.jsx)("header",{className:i().header,children:(0,k.jsx)(w,{})}),(0,k.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:i().push})]}),(0,k.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,k.jsx)("span",{className:i().backToHome,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},8328:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return w},default:function(){return B}});var t=n(2777),r=n(2262),s=n(5959),i=n(3553),c=n(7247),l=n(9008),o=n.n(l),_=n(6057),h=n(1664),d=n.n(h),u=n(7294),m=n(7839),g=n.n(m),v=n(199),f=n.n(v),p=n(1294),x=n(9417),j=n(3024),b=n(7814),y=n(5893);function N(e){var a="";""!=e.website&&(a=(0,y.jsx)("a",{href:e.website,target:"_blank",children:(0,y.jsx)(b.G,{className:g().icon,icon:"user"})}));var n="";if(""!=e.twitter){var t="https://twitter.com/"+e.twitter;n=(0,y.jsx)("a",{href:t,target:"_blank",children:(0,y.jsx)(b.G,{className:g().icon,icon:j.mdU})})}var r="";(""!=n||""!=a)&&(r=(0,y.jsxs)("div",{children:[a," ",(0,y.jsx)("span",{className:f().spacer})," ",n]}));var s="";return""!=e.tags&&void 0!=e.tags&&(s=(0,y.jsx)("div",{className:f().tags,children:e.tags.map(function(e,a){return(0,y.jsx)("div",{children:e},a)})})),(0,y.jsxs)("div",{className:f().card,children:[(0,y.jsx)("h3",{className:f().name,children:e.name}),(0,y.jsx)("p",{className:f().title,children:e.organization}),(0,y.jsx)("div",{className:f().note,children:e.note}),r,s]})}p.vI.add(x.ILF);var k=function(e){(0,s.Z)(l,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,c.Z)(l);if(a){var t=(0,c.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,i.Z)(this,e)});function l(e){(0,t.Z)(this,l);var a,r=(a=n.call(this,e)).props.contacts.map(function(e,a){return(0,y.jsx)(N,{name:e.name,position:e.position,organization:e.organization,website:e.website,twitter:e.twitter,note:e.note,tags:e.tags},a)});return a.state={contact_cards:r},a}return(0,r.Z)(l,[{key:"componentDidMount",value:function(){for(var e=this.state.contact_cards.slice(),a=e.length-1;a>0;a--){var n=Math.floor(Math.random()*(a+1)),t=[e[n],e[a]];e[a]=t[0],e[n]=t[1]}this.setState({contact_cards:e})}},{key:"render",value:function(){return(0,y.jsx)("section",{className:f().cards,children:this.state.contact_cards})}}]),l}(u.Component),w=!0;function B(e){var a=e.teamData;return(0,y.jsxs)(_.Z,{home:!0,children:[(0,y.jsx)(o(),{children:(0,y.jsx)("title",{children:"GEM Team 2021"})}),(0,y.jsxs)("article",{children:[(0,y.jsx)("div",{className:g().headingXl,children:"GEMv1 Team"}),(0,y.jsxs)("div",{className:f().description,children:["GEM is a community-driven effort with the goal to improve how progress in natural language generation is measured. It would not be possible without a large group of collaborators to take on challenging tasks.",(0,y.jsxs)("p",{children:["This page acts as a directory of our amazing contributors. If you want to join the organization,  ",(0,y.jsx)(d(),{legacyBehavior:!0,href:"/team/join",children:(0,y.jsx)("a",{children:"click here to fill out the sign-up form."})})]})]}),(0,y.jsx)("div",{className:f().centered,children:(0,y.jsx)(k,{contacts:a.teamMembers})})]})]})}},1097:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/team/2021",function(){return n(8328)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},199:function(e){e.exports={description:"__2021_description__8g5Ob",name:"__2021_name__zTXFB",title:"__2021_title__1S7ct",note:"__2021_note__4cN0s",spacer:"__2021_spacer__vWOVs",centered:"__2021_centered__5nV8M",cards:"__2021_cards__9JF_K",card:"__2021_card__XfP_0",tags:"__2021_tags__mAc1G"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,948,50,774,888,179],function(){return e(e.s=1097)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/team/2021-82841191601aed91.js b/_next/static/chunks/pages/team/2021-82841191601aed91.js
deleted file mode 100644
index 0ff065fd..00000000
--- a/_next/static/chunks/pages/team/2021-82841191601aed91.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[926],{6057:function(e,n,t){"use strict";t.d(n,{Z:function(){return F},y:function(){return M}});var a=t(9008),r=t.n(a),s=t(2717),i=t.n(s),c=t(1943),l=t.n(c),o=t(7839),_=t.n(o),d=t(1664),h=t.n(d),u=t(2777),m=t(2262),g=t(748),f=t(5959),v=t(3553),p=t(7247),x=t(7294),j=t(4776),b=t.n(j),N=t(9417),k=t(7814),y=t(5893),w=function(e){(0,f.Z)(a,e);var n,t=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,t=(0,p.Z)(a);if(n){var r=(0,p.Z)(this).constructor;e=Reflect.construct(t,arguments,r)}else e=t.apply(this,arguments);return(0,v.Z)(this,e)});function a(e){var n;return(0,u.Z)(this,a),(n=t.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(a,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(h(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(h(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),a}(x.Component),M="GEM";function F(e){var n=e.children,t=e.home,a=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:M}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(a&&l().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!t&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(h(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},8328:function(e,n,t){"use strict";t.r(n),t.d(n,{__N_SSG:function(){return w},default:function(){return M}});var a=t(2777),r=t(2262),s=t(5959),i=t(3553),c=t(7247),l=t(9008),o=t.n(l),_=t(6057),d=t(1664),h=t.n(d),u=t(7294),m=t(7839),g=t.n(m),f=t(199),v=t.n(f),p=t(1294),x=t(9417),j=t(3024),b=t(7814),N=t(5893);function k(e){var n="";""!=e.website&&(n=(0,N.jsx)("a",{href:e.website,target:"_blank",children:(0,N.jsx)(b.G,{className:g().icon,icon:"user"})}));var t="";if(""!=e.twitter){var a="https://twitter.com/"+e.twitter;t=(0,N.jsx)("a",{href:a,target:"_blank",children:(0,N.jsx)(b.G,{className:g().icon,icon:j.mdU})})}var r="";(""!=t||""!=n)&&(r=(0,N.jsxs)("div",{children:[n," ",(0,N.jsx)("span",{className:v().spacer})," ",t]}));var s="";return""!=e.tags&&void 0!=e.tags&&(s=(0,N.jsx)("div",{className:v().tags,children:e.tags.map(function(e,n){return(0,N.jsx)("div",{children:e},n)})})),(0,N.jsxs)("div",{className:v().card,children:[(0,N.jsx)("h3",{className:v().name,children:e.name}),(0,N.jsx)("p",{className:v().title,children:e.organization}),(0,N.jsx)("div",{className:v().note,children:e.note}),r,s]})}p.vI.add(x.ILF);var y=function(e){(0,s.Z)(l,e);var n,t=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,t=(0,c.Z)(l);if(n){var a=(0,c.Z)(this).constructor;e=Reflect.construct(t,arguments,a)}else e=t.apply(this,arguments);return(0,i.Z)(this,e)});function l(e){(0,a.Z)(this,l);var n,r=(n=t.call(this,e)).props.contacts.map(function(e,n){return(0,N.jsx)(k,{name:e.name,position:e.position,organization:e.organization,website:e.website,twitter:e.twitter,note:e.note,tags:e.tags},n)});return n.state={contact_cards:r},n}return(0,r.Z)(l,[{key:"componentDidMount",value:function(){for(var e=this.state.contact_cards.slice(),n=e.length-1;n>0;n--){var t=Math.floor(Math.random()*(n+1)),a=[e[t],e[n]];e[n]=a[0],e[t]=a[1]}this.setState({contact_cards:e})}},{key:"render",value:function(){return(0,N.jsx)("section",{className:v().cards,children:this.state.contact_cards})}}]),l}(u.Component),w=!0;function M(e){var n=e.teamData;return(0,N.jsxs)(_.Z,{home:!0,children:[(0,N.jsx)(o(),{children:(0,N.jsx)("title",{children:"GEM Team 2021"})}),(0,N.jsxs)("article",{children:[(0,N.jsx)("div",{className:g().headingXl,children:"GEMv1 Team"}),(0,N.jsxs)("div",{className:v().description,children:["GEM is a community-driven effort with the goal to improve how progress in natural language generation is measured. It would not be possible without a large group of collaborators to take on challenging tasks.",(0,N.jsxs)("p",{children:["This page acts as a directory of our amazing contributors. If you want to join the organization,  ",(0,N.jsx)(h(),{href:"/team/join",children:(0,N.jsx)("a",{children:"click here to fill out the sign-up form."})})]})]}),(0,N.jsx)("div",{className:v().centered,children:(0,N.jsx)(y,{contacts:n.teamMembers})})]})]})}},1097:function(e,n,t){(window.__NEXT_P=window.__NEXT_P||[]).push(["/team/2021",function(){return t(8328)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},199:function(e){e.exports={description:"__2021_description__8g5Ob",name:"__2021_name__zTXFB",title:"__2021_title__1S7ct",note:"__2021_note__4cN0s",spacer:"__2021_spacer__vWOVs",centered:"__2021_centered__5nV8M",cards:"__2021_cards__9JF_K",card:"__2021_card__XfP_0",tags:"__2021_tags__mAc1G"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,948,50,774,888,179],function(){return e(e.s=1097)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/team/join-1f15410a6fdefaa2.js b/_next/static/chunks/pages/team/join-1f15410a6fdefaa2.js
new file mode 100644
index 00000000..feba0ea8
--- /dev/null
+++ b/_next/static/chunks/pages/team/join-1f15410a6fdefaa2.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[543],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return E},y:function(){return B}});var r=a(9008),t=a.n(r),i=a(2717),s=a.n(i),l=a(1943),c=a.n(l),o=a(7839),h=a.n(o),d=a(1664),u=a.n(d),_=a(2777),g=a(2262),m=a(748),v=a(5959),f=a(3553),p=a(7247),x=a(7294),j=a(4776),b=a.n(j),y=a(9417),k=a(7814),N=a(5893),w=function(e){(0,v.Z)(r,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,p.Z)(r);if(n){var t=(0,p.Z)(this).constructor;e=Reflect.construct(a,arguments,t)}else e=a.apply(this,arguments);return(0,f.Z)(this,e)});function r(e){var n;return(0,_.Z)(this,r),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,m.Z)(n)),n.state={active:!1},n}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:h().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(x.Component),B="GEM";function E(e){var n=e.children,a=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:B}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(s().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:s().header,children:(0,N.jsx)(w,{})}),(0,N.jsxs)("div",{className:"".concat(s().container," ").concat(i&&s().wideContainer),children:[(0,N.jsx)("main",{children:n}),(0,N.jsx)("div",{className:s().push})]}),(0,N.jsxs)("footer",{className:s().footer+" "+h().eggshell,children:[!a&&(0,N.jsx)("span",{className:s().backToHome,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:h().accentUnderline,children:"google group"})," for support."]})]})]})]})}},8377:function(e,n,a){"use strict";a.r(n),a.d(n,{default:function(){return u}});var r=a(7191),t=a(6057),i=a(9008),s=a.n(i),l=a(7839),c=a.n(l),o=a(6542),h=a.n(o),d=a(5893);function u(e){return(0,r.Z)(e),(0,d.jsxs)(t.Z,{children:[(0,d.jsx)(s(),{children:(0,d.jsx)("title",{children:"Help us build GEM \uD83D\uDC8E"})}),(0,d.jsxs)("article",{children:[(0,d.jsx)("span",{className:c().headingXl,children:"Sign up to participate in the GEM Organization"}),(0,d.jsx)("span",{className:c().smallSpace}),(0,d.jsxs)("div",{children:[(0,d.jsx)("p",{children:"Please use the form below to sign up to help with GEM. We are looking for both junior and senior researchers across many tasks. Even if you are only looking to listen and learn, please sign up."}),(0,d.jsxs)("p",{children:["The involvement can range from participating in our data hackathon, documenting and improving your own dataset, or helping to write documentation, to organizing the next workshop or shared task. If the form below does not load for you, you can find the form at",(0,d.jsx)("a",{href:"https://forms.gle/K3834ezoVSGPxNQQ7",target:"_blank",children:" this URL"}),"."]})]}),(0,d.jsx)("div",{className:h().centered,children:(0,d.jsx)("iframe",{src:"https://docs.google.com/forms/d/e/1FAIpQLScUcmFM1rvmL1qVAatbHajDhqnKbNYK3oi6JzJ0_4wNTkiwog/viewform?embedded=true",width:"100%",height:"1782",frameBorder:"0",marginHeight:"0",marginWidth:"0",children:"Loading…"})})]})]})}},6427:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/team/join",function(){return a(8377)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},6542:function(e){e.exports={centered:"join_centered__jxbae"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}},7191:function(e,n,a){"use strict";function r(e){if(null==e)throw TypeError("Cannot destructure undefined")}a.d(n,{Z:function(){return r}})}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=6427)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/team/join-c4bb4d37c8c737a1.js b/_next/static/chunks/pages/team/join-c4bb4d37c8c737a1.js
deleted file mode 100644
index fdd6af37..00000000
--- a/_next/static/chunks/pages/team/join-c4bb4d37c8c737a1.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[543],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return C},y:function(){return E}});var r=a(9008),t=a.n(r),i=a(2717),s=a.n(i),l=a(1943),c=a.n(l),o=a(7839),d=a.n(o),h=a(1664),u=a.n(h),_=a(2777),m=a(2262),g=a(748),f=a(5959),p=a(3553),x=a(7247),v=a(7294),j=a(4776),b=a.n(j),k=a(9417),N=a(7814),y=a(5893),w=function(e){(0,f.Z)(r,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,x.Z)(r);if(n){var t=(0,x.Z)(this).constructor;e=Reflect.construct(a,arguments,t)}else e=a.apply(this,arguments);return(0,p.Z)(this,e)});function r(e){var n;return(0,_.Z)(this,r),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:d().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(u(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(N.G,{className:b().bar,icon:k.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(u(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(v.Component),E="GEM";function C(e){var n=e.children,a=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(t(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:E}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(s().background," ").concat(r&&c().background),children:[(0,y.jsx)("header",{className:s().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(s().container," ").concat(i&&s().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:s().push})]}),(0,y.jsxs)("footer",{className:s().footer+" "+d().eggshell,children:[!a&&(0,y.jsx)("span",{className:s().backToHome,children:(0,y.jsx)(u(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:d().accentUnderline,children:"google group"})," for support."]})]})]})]})}},8377:function(e,n,a){"use strict";a.r(n),a.d(n,{default:function(){return u}});var r=a(7191),t=a(6057),i=a(9008),s=a.n(i);a(1664);var l=a(7839),c=a.n(l),o=a(6542),d=a.n(o),h=a(5893);function u(e){return(0,r.Z)(e),(0,h.jsxs)(t.Z,{children:[(0,h.jsx)(s(),{children:(0,h.jsx)("title",{children:"Help us build GEM \uD83D\uDC8E"})}),(0,h.jsxs)("article",{children:[(0,h.jsx)("span",{className:c().headingXl,children:"Sign up to participate in the GEM Organization"}),(0,h.jsx)("span",{className:c().smallSpace}),(0,h.jsxs)("div",{children:[(0,h.jsx)("p",{children:"Please use the form below to sign up to help with GEM. We are looking for both junior and senior researchers across many tasks. Even if you are only looking to listen and learn, please sign up."}),(0,h.jsxs)("p",{children:["The involvement can range from participating in our data hackathon, documenting and improving your own dataset, or helping to write documentation, to organizing the next workshop or shared task. If the form below does not load for you, you can find the form at",(0,h.jsx)("a",{href:"https://forms.gle/K3834ezoVSGPxNQQ7",target:"_blank",children:" this URL"}),"."]})]}),(0,h.jsx)("div",{className:d().centered,children:(0,h.jsx)("iframe",{src:"https://docs.google.com/forms/d/e/1FAIpQLScUcmFM1rvmL1qVAatbHajDhqnKbNYK3oi6JzJ0_4wNTkiwog/viewform?embedded=true",width:"100%",height:"1782",frameBorder:"0",marginHeight:"0",marginWidth:"0",children:"Loading…"})})]})]})}},6427:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/team/join",function(){return a(8377)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},6542:function(e){e.exports={centered:"join_centered__jxbae"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}},7191:function(e,n,a){"use strict";function r(e){if(null==e)throw TypeError("Cannot destructure undefined")}a.d(n,{Z:function(){return r}})}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=6427)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/turker_faq-00cb2ea336fe51b5.js b/_next/static/chunks/pages/turker_faq-00cb2ea336fe51b5.js
new file mode 100644
index 00000000..c047ebad
--- /dev/null
+++ b/_next/static/chunks/pages/turker_faq-00cb2ea336fe51b5.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[303],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return w},y:function(){return F}});var r=n(9008),t=n.n(r),i=n(2717),s=n.n(i),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),u=n.n(h),d=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),f=n(7247),p=n(7294),j=n(4776),b=n.n(j),y=n(9417),k=n(7814),N=n(5893),B=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,f.Z)(r);if(a){var t=(0,f.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,d.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(p.Component),F="GEM";function w(e){var a=e.children,n=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:F}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(s().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:s().header,children:(0,N.jsx)(B,{})}),(0,N.jsxs)("div",{className:"".concat(s().container," ").concat(i&&s().wideContainer),children:[(0,N.jsx)("main",{children:a}),(0,N.jsx)("div",{className:s().push})]}),(0,N.jsxs)("footer",{className:s().footer+" "+_().eggshell,children:[!n&&(0,N.jsx)("span",{className:s().backToHome,children:(0,N.jsx)(u(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},2174:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),i=n.n(t),s=n(7839),l=n.n(s),c=n(5893),o=!0;function _(e){var a=e.Data;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(i(),{children:(0,c.jsx)("title",{children:"GEM MTurk Annotation FAQ"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM MTurk Annotation FAQ"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},9689:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/turker_faq",function(){return n(2174)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=9689)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/turker_faq-48f013534070af29.js b/_next/static/chunks/pages/turker_faq-48f013534070af29.js
deleted file mode 100644
index e88ee64d..00000000
--- a/_next/static/chunks/pages/turker_faq-48f013534070af29.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[303],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return M},y:function(){return w}});var t=a(9008),r=a.n(t),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),u=a.n(h),d=a(2777),m=a(2262),g=a(748),v=a(5959),x=a(3553),f=a(7247),p=a(7294),j=a(4776),b=a.n(j),k=a(9417),N=a(7814),y=a(5893),F=function(e){(0,v.Z)(t,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,f.Z)(t);if(n){var r=(0,f.Z)(this).constructor;e=Reflect.construct(a,arguments,r)}else e=a.apply(this,arguments);return(0,x.Z)(this,e)});function t(e){var n;return(0,d.Z)(this,t),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(t,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(u(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(N.G,{className:b().bar,icon:k.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(u(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(u(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),t}(p.Component),w="GEM";function M(e){var n=e.children,a=e.home,t=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:w}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(t&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(F,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(u(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},2174:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var t=a(6057),r=a(9008),s=a.n(r),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.Data;return(0,c.jsxs)(t.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM MTurk Annotation FAQ"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM MTurk Annotation FAQ"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},9689:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/turker_faq",function(){return a(2174)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=9689)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/tutorials-29d01441a932687d.js b/_next/static/chunks/pages/tutorials-29d01441a932687d.js
new file mode 100644
index 00000000..279f03fc
--- /dev/null
+++ b/_next/static/chunks/pages/tutorials-29d01441a932687d.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[204],{6057:function(e,a,s){"use strict";s.d(a,{Z:function(){return M},y:function(){return E}});var n=s(9008),l=s.n(n),r=s(2717),t=s.n(r),i=s(1943),c=s.n(i),o=s(7839),h=s.n(o),d=s(1664),_=s.n(d),u=s(2777),m=s(2262),g=s(748),x=s(5959),p=s(3553),v=s(7247),j=s(7294),f=s(4776),b=s.n(f),N=s(9417),k=s(7814),y=s(5893),w=function(e){(0,x.Z)(n,e);var a,s=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,s=(0,v.Z)(n);if(a){var l=(0,v.Z)(this).constructor;e=Reflect.construct(s,arguments,l)}else e=s.apply(this,arguments);return(0,p.Z)(this,e)});function n(e){var a;return(0,u.Z)(this,n),(a=s.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(n,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:h().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),n}(j.Component),E="GEM";function M(e){var a=e.children,s=e.home,n=e.nlAugmenter,r=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(l(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:E}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(t().background," ").concat(n&&c().background),children:[(0,y.jsx)("header",{className:t().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(t().container," ").concat(r&&t().wideContainer),children:[(0,y.jsx)("main",{children:a}),(0,y.jsx)("div",{className:t().push})]}),(0,y.jsxs)("footer",{className:t().footer+" "+h().eggshell,children:[!s&&(0,y.jsx)("span",{className:t().backToHome,children:(0,y.jsx)(_(),{legacyBehavior:!0,href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:h().accentUnderline,children:"google group"})," for support."]})]})]})]})}},7378:function(e,a,s){"use strict";s.r(a),s.d(a,{__N_SSG:function(){return u},default:function(){return m}});var n=s(6057),l=s(1664),r=s.n(l),t=s(9008),i=s.n(t),c=s(7839),o=s.n(c),h=s(1017),d=s.n(h),_=s(5893),u=!0;function m(e){var a=e.allData;return(0,_.jsxs)(n.Z,{children:[(0,_.jsx)(i(),{children:(0,_.jsx)("title",{children:"GEM Model Cards"})}),(0,_.jsxs)("article",{children:[(0,_.jsx)("span",{className:o().headingXl,children:"GEM Tutorials"}),(0,_.jsx)("p",{className:d().description,children:"Here you can find all information to get started using GEM datasets, models, and resources, and how to add new datasets."}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("h2",{className:o().headingLg,children:"Text Walkthroughs"}),(0,_.jsx)("ul",{className:o().list,children:a.map(function(e){var a=e.id,s=e.title,n=e.type,l=e.background;return(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)(r(),{legacyBehavior:!0,href:"/tutorials/".concat(a),children:(0,_.jsx)("a",{className:d().larger,children:s})}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:n}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:d().model,children:l})]},a)})}),(0,_.jsx)("h2",{className:o().headingLg,children:"Video Guides"}),(0,_.jsx)("ul",{className:o().list,children:(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)("a",{href:"https://www.youtube.com/watch?v=DpK478-ozPE",target:"_blank",className:d().larger,children:"Creating a filter"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:"Transformation"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:d().model,children:"This walkthrough shows you how to create a filter from scratch using NL-Augmenter."})]})}),(0,_.jsx)("h2",{className:o().headingLg,children:"Interactive Notebooks"}),(0,_.jsxs)("ul",{className:o().list,children:[(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)("a",{href:"https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_modeling_walkthrough.ipynb",target:"_blank",className:d().larger,children:"From pretrained model to submission"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:"Modeling"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:d().model,children:"This is an interactive version of the introduction tutorial."})]}),(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)("a",{href:"https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_Hackathon_2021_filters_tutorial.ipynb",target:"_blank",className:d().larger,children:"Creating a filter"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:"Transformation"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:d().model,children:"This notebook shows you how to create a filter from scratch using NL-Augmenter. Please see the accompanying video for in-depth explanations."})]})]})]})]})}},5669:function(e,a,s){(window.__NEXT_P=window.__NEXT_P||[]).push(["/tutorials",function(){return s(7378)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},1017:function(e){e.exports={description:"tutorials_description__TWolk",larger:"tutorials_larger__S2v9T",model:"tutorials_model__DyScr"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5669)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/tutorials-a69ad6be7eda1572.js b/_next/static/chunks/pages/tutorials-a69ad6be7eda1572.js
deleted file mode 100644
index 75104b35..00000000
--- a/_next/static/chunks/pages/tutorials-a69ad6be7eda1572.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[204],{6057:function(e,a,s){"use strict";s.d(a,{Z:function(){return M},y:function(){return E}});var n=s(9008),l=s.n(n),t=s(2717),r=s.n(t),i=s(1943),c=s.n(i),o=s(7839),h=s.n(o),d=s(1664),_=s.n(d),u=s(2777),m=s(2262),g=s(748),x=s(5959),p=s(3553),j=s(7247),f=s(7294),v=s(4776),b=s.n(v),N=s(9417),k=s(7814),y=s(5893),w=function(e){(0,x.Z)(n,e);var a,s=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,s=(0,j.Z)(n);if(a){var l=(0,j.Z)(this).constructor;e=Reflect.construct(s,arguments,l)}else e=s.apply(this,arguments);return(0,p.Z)(this,e)});function n(e){var a;return(0,u.Z)(this,n),(a=s.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,g.Z)(a)),a.state={active:!1},a}return(0,m.Z)(n,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:h().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(_(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(_(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(_(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),n}(f.Component),E="GEM";function M(e){var a=e.children,s=e.home,n=e.nlAugmenter,t=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(l(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:E}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(r().background," ").concat(n&&c().background),children:[(0,y.jsx)("header",{className:r().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(r().container," ").concat(t&&r().wideContainer),children:[(0,y.jsx)("main",{children:a}),(0,y.jsx)("div",{className:r().push})]}),(0,y.jsxs)("footer",{className:r().footer+" "+h().eggshell,children:[!s&&(0,y.jsx)("span",{className:r().backToHome,children:(0,y.jsx)(_(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:h().accentUnderline,children:"google group"})," for support."]})]})]})]})}},7378:function(e,a,s){"use strict";s.r(a),s.d(a,{__N_SSG:function(){return u},default:function(){return m}});var n=s(6057),l=s(1664),t=s.n(l),r=s(9008),i=s.n(r),c=s(7839),o=s.n(c),h=s(1017),d=s.n(h),_=s(5893),u=!0;function m(e){var a=e.allData;return(0,_.jsxs)(n.Z,{children:[(0,_.jsx)(i(),{children:(0,_.jsx)("title",{children:"GEM Model Cards"})}),(0,_.jsxs)("article",{children:[(0,_.jsx)("span",{className:o().headingXl,children:"GEM Tutorials"}),(0,_.jsx)("p",{className:d().description,children:"Here you can find all information to get started using GEM datasets, models, and resources, and how to add new datasets."}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("h2",{className:o().headingLg,children:"Text Walkthroughs"}),(0,_.jsx)("ul",{className:o().list,children:a.map(function(e){var a=e.id,s=e.title,n=e.type,l=e.background;return(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)(t(),{href:"/tutorials/".concat(a),children:(0,_.jsx)("a",{className:d().larger,children:s})}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:n}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:d().model,children:l})]},a)})}),(0,_.jsx)("h2",{className:o().headingLg,children:"Video Guides"}),(0,_.jsx)("ul",{className:o().list,children:(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)("a",{href:"https://www.youtube.com/watch?v=DpK478-ozPE",target:"_blank",className:d().larger,children:"Creating a filter"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:"Transformation"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:d().model,children:"This walkthrough shows you how to create a filter from scratch using NL-Augmenter."})]})}),(0,_.jsx)("h2",{className:o().headingLg,children:"Interactive Notebooks"}),(0,_.jsxs)("ul",{className:o().list,children:[(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)("a",{href:"https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_modeling_walkthrough.ipynb",target:"_blank",className:d().larger,children:"From pretrained model to submission"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:"Modeling"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:d().model,children:"This is an interactive version of the introduction tutorial."})]}),(0,_.jsxs)("li",{className:o().listItem,children:[(0,_.jsx)("a",{href:"https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_Hackathon_2021_filters_tutorial.ipynb",target:"_blank",className:d().larger,children:"Creating a filter"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("small",{className:o().lightText,children:"Transformation"}),(0,_.jsx)("span",{className:o().smallSpace}),(0,_.jsx)("div",{className:d().model,children:"This notebook shows you how to create a filter from scratch using NL-Augmenter. Please see the accompanying video for in-depth explanations."})]})]})]})]})}},5669:function(e,a,s){(window.__NEXT_P=window.__NEXT_P||[]).push(["/tutorials",function(){return s(7378)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},1017:function(e){e.exports={description:"tutorials_description__TWolk",larger:"tutorials_larger__S2v9T",model:"tutorials_model__DyScr"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5669)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/tutorials/[id]-69574b54cf872f16.js b/_next/static/chunks/pages/tutorials/[id]-69574b54cf872f16.js
new file mode 100644
index 00000000..6a722ebe
--- /dev/null
+++ b/_next/static/chunks/pages/tutorials/[id]-69574b54cf872f16.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[803],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return w}});var r=n(9008),t=n.n(r),i=n(2717),s=n.n(i),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),N=n(7814),k=n(5893),B=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,k.jsxs)("div",{className:b().navwrapper,children:[(0,k.jsx)("div",{className:b().gradbar}),(0,k.jsxs)("nav",{className:b().navbar,children:[(0,k.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,k.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,k.jsx)(N.G,{className:b().bar,icon:y.xiG})}),(0,k.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,k.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,k.jsx)("a",{children:"Resources"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,k.jsx)("a",{children:"Data Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,k.jsx)("a",{children:"Model Cards"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,k.jsx)("a",{children:"tutorials"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,k.jsx)("a",{children:"Results"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,k.jsx)("a",{children:"Papers"})})}),(0,k.jsx)("li",{className:b().navitem,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,k.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),w="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,i=e.wideContainer;return(0,k.jsxs)(k.Fragment,{children:[(0,k.jsxs)(t(),{children:[(0,k.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,k.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,k.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,k.jsx)("meta",{name:"og:title",content:w}),(0,k.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,k.jsxs)("div",{className:"".concat(s().background," ").concat(r&&c().background),children:[(0,k.jsx)("header",{className:s().header,children:(0,k.jsx)(B,{})}),(0,k.jsxs)("div",{className:"".concat(s().container," ").concat(i&&s().wideContainer),children:[(0,k.jsx)("main",{children:a}),(0,k.jsx)("div",{className:s().push})]}),(0,k.jsxs)("footer",{className:s().footer+" "+_().eggshell,children:[!n&&(0,k.jsx)("span",{className:s().backToHome,children:(0,k.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,k.jsx)("a",{children:"← Home"})})}),(0,k.jsxs)("span",{children:["If you have any questions, please join our ",(0,k.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},9212:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),i=n.n(t),s=n(7839),l=n.n(s),c=n(5893),o=!0;function _(e){var a=e.taskData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(i(),{children:(0,c.jsxs)("title",{children:["GEM ",a.title]})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:a.title}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("span",{className:l().lightText,children:a.type}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},5349:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/tutorials/[id]",function(){return n(9212)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5349)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/tutorials/[id]-78530e674236e7c8.js b/_next/static/chunks/pages/tutorials/[id]-78530e674236e7c8.js
deleted file mode 100644
index 21e66eb7..00000000
--- a/_next/static/chunks/pages/tutorials/[id]-78530e674236e7c8.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[803],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return F},y:function(){return C}});var t=a(9008),r=a.n(t),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),d=a.n(h),u=a(2777),m=a(2262),g=a(748),x=a(5959),v=a(3553),p=a(7247),f=a(7294),j=a(4776),b=a.n(j),N=a(9417),k=a(7814),y=a(5893),w=function(e){(0,x.Z)(t,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,p.Z)(t);if(n){var r=(0,p.Z)(this).constructor;e=Reflect.construct(a,arguments,r)}else e=a.apply(this,arguments);return(0,v.Z)(this,e)});function t(e){var n;return(0,u.Z)(this,t),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(t,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(k.G,{className:b().bar,icon:N.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(d(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),t}(f.Component),C="GEM";function F(e){var n=e.children,a=e.home,t=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(r(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(t&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},9212:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var t=a(6057),r=a(9008),s=a.n(r),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.taskData;return(0,c.jsxs)(t.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsxs)("title",{children:["GEM ",n.title]})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:n.title}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("span",{className:l().lightText,children:n.type}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},5349:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/tutorials/[id]",function(){return a(9212)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5349)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop-ab0e5c9fcf25aeda.js b/_next/static/chunks/pages/workshop-ab0e5c9fcf25aeda.js
new file mode 100644
index 00000000..c722a3bc
--- /dev/null
+++ b/_next/static/chunks/pages/workshop-ab0e5c9fcf25aeda.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[906],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return B}});var r=n(9008),t=n.n(r),s=n(2717),i=n.n(s),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),k=n(7814),N=n(5893),w=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),B="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:B}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:i().header,children:(0,N.jsx)(w,{})}),(0,N.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,N.jsx)("main",{children:a}),(0,N.jsx)("div",{className:i().push})]}),(0,N.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,N.jsx)("span",{className:i().backToHome,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},5760:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),s=n.n(t),i=n(7839),l=n.n(i),c=n(5893),o=!0;function _(e){var a=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2023"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM \uD83D\uDC8E Workshop at EMNLP 2023"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},3448:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop",function(){return n(5760)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=3448)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop-b31f31ce3cd1987f.js b/_next/static/chunks/pages/workshop-b31f31ce3cd1987f.js
deleted file mode 100644
index 8a291455..00000000
--- a/_next/static/chunks/pages/workshop-b31f31ce3cd1987f.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[906],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return E},y:function(){return C}});var r=a(9008),t=a.n(r),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),d=a.n(h),u=a(2777),m=a(2262),g=a(748),v=a(5959),x=a(3553),p=a(7247),f=a(7294),j=a(4776),b=a.n(j),k=a(9417),N=a(7814),y=a(5893),w=function(e){(0,v.Z)(r,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,p.Z)(r);if(n){var t=(0,p.Z)(this).constructor;e=Reflect.construct(a,arguments,t)}else e=a.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var n;return(0,u.Z)(this,r),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(N.G,{className:b().bar,icon:k.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(d(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),C="GEM";function E(e){var n=e.children,a=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(t(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},5760:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var r=a(6057),t=a(9008),s=a.n(t),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2023"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM \uD83D\uDC8E Workshop at EMNLP 2023"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},3448:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop",function(){return a(5760)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=3448)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop/2021-01c29971917ca8b8.js b/_next/static/chunks/pages/workshop/2021-01c29971917ca8b8.js
deleted file mode 100644
index 921bf6c4..00000000
--- a/_next/static/chunks/pages/workshop/2021-01c29971917ca8b8.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[192],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return F},y:function(){return C}});var r=a(9008),t=a.n(r),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),d=a.n(h),u=a(2777),m=a(2262),g=a(748),v=a(5959),x=a(3553),p=a(7247),f=a(7294),j=a(4776),b=a.n(j),k=a(9417),N=a(7814),y=a(5893),w=function(e){(0,v.Z)(r,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,p.Z)(r);if(n){var t=(0,p.Z)(this).constructor;e=Reflect.construct(a,arguments,t)}else e=a.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var n;return(0,u.Z)(this,r),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(N.G,{className:b().bar,icon:k.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(d(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),C="GEM";function F(e){var n=e.children,a=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(t(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},50:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var r=a(6057),t=a(9008),s=a.n(t),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2021"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM Workshop at ACL 2021"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},4183:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop/2021",function(){return a(50)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=4183)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop/2021-f9fcbddb51e9ee43.js b/_next/static/chunks/pages/workshop/2021-f9fcbddb51e9ee43.js
new file mode 100644
index 00000000..014a3515
--- /dev/null
+++ b/_next/static/chunks/pages/workshop/2021-f9fcbddb51e9ee43.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[192],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return B}});var r=n(9008),t=n.n(r),s=n(2717),i=n.n(s),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),k=n(7814),N=n(5893),w=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),B="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:B}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:i().header,children:(0,N.jsx)(w,{})}),(0,N.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,N.jsx)("main",{children:a}),(0,N.jsx)("div",{className:i().push})]}),(0,N.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,N.jsx)("span",{className:i().backToHome,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},50:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),s=n.n(t),i=n(7839),l=n.n(i),c=n(5893),o=!0;function _(e){var a=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2021"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM Workshop at ACL 2021"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},4183:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop/2021",function(){return n(50)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=4183)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop/2022-09b035959070f75b.js b/_next/static/chunks/pages/workshop/2022-09b035959070f75b.js
deleted file mode 100644
index 128b4f93..00000000
--- a/_next/static/chunks/pages/workshop/2022-09b035959070f75b.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[539],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return E},y:function(){return C}});var r=a(9008),t=a.n(r),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),d=a.n(h),u=a(2777),m=a(2262),g=a(748),v=a(5959),x=a(3553),p=a(7247),f=a(7294),j=a(4776),b=a.n(j),k=a(9417),N=a(7814),y=a(5893),w=function(e){(0,v.Z)(r,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,p.Z)(r);if(n){var t=(0,p.Z)(this).constructor;e=Reflect.construct(a,arguments,t)}else e=a.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var n;return(0,u.Z)(this,r),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(N.G,{className:b().bar,icon:k.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(d(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),C="GEM";function E(e){var n=e.children,a=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(t(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},6274:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var r=a(6057),t=a(9008),s=a.n(t),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2022"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM \uD83D\uDC8E Workshop at EMNLP 2022"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},3314:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop/2022",function(){return a(6274)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=3314)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop/2022-0e921309e3e202c4.js b/_next/static/chunks/pages/workshop/2022-0e921309e3e202c4.js
new file mode 100644
index 00000000..42f2e7f6
--- /dev/null
+++ b/_next/static/chunks/pages/workshop/2022-0e921309e3e202c4.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[539],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return B}});var r=n(9008),t=n.n(r),s=n(2717),i=n.n(s),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),k=n(7814),N=n(5893),w=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),B="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:B}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:i().header,children:(0,N.jsx)(w,{})}),(0,N.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,N.jsx)("main",{children:a}),(0,N.jsx)("div",{className:i().push})]}),(0,N.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,N.jsx)("span",{className:i().backToHome,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},6274:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),s=n.n(t),i=n(7839),l=n.n(i),c=n(5893),o=!0;function _(e){var a=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2022"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM \uD83D\uDC8E Workshop at EMNLP 2022"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},3314:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop/2022",function(){return n(6274)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=3314)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop/2022-call-43c4e2f64520f9cb.js b/_next/static/chunks/pages/workshop/2022-call-43c4e2f64520f9cb.js
new file mode 100644
index 00000000..f936d1be
--- /dev/null
+++ b/_next/static/chunks/pages/workshop/2022-call-43c4e2f64520f9cb.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[639],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return B}});var r=n(9008),t=n.n(r),s=n(2717),i=n.n(s),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),k=n(7814),N=n(5893),w=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),B="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:B}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:i().header,children:(0,N.jsx)(w,{})}),(0,N.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,N.jsx)("main",{children:a}),(0,N.jsx)("div",{className:i().push})]}),(0,N.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,N.jsx)("span",{className:i().backToHome,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},8982:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),s=n.n(t),i=n(7839),l=n.n(i),c=n(5893),o=!0;function _(e){var a=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2022"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM Workshop at EMNLP 2022"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},5633:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop/2022-call",function(){return n(8982)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5633)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop/2022-call-ae9c70e62c8298a6.js b/_next/static/chunks/pages/workshop/2022-call-ae9c70e62c8298a6.js
deleted file mode 100644
index f9f02def..00000000
--- a/_next/static/chunks/pages/workshop/2022-call-ae9c70e62c8298a6.js
+++ /dev/null
@@ -1 +0,0 @@
-(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[639],{6057:function(e,n,a){"use strict";a.d(n,{Z:function(){return E},y:function(){return C}});var r=a(9008),t=a.n(r),s=a(2717),i=a.n(s),l=a(1943),c=a.n(l),o=a(7839),_=a.n(o),h=a(1664),d=a.n(h),u=a(2777),m=a(2262),g=a(748),v=a(5959),x=a(3553),p=a(7247),f=a(7294),j=a(4776),b=a.n(j),k=a(9417),N=a(7814),y=a(5893),w=function(e){(0,v.Z)(r,e);var n,a=(n=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,a=(0,p.Z)(r);if(n){var t=(0,p.Z)(this).constructor;e=Reflect.construct(a,arguments,t)}else e=a.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var n;return(0,u.Z)(this,r),(n=a.call(this,e)).handleMobileClick=n.handleMobileClick.bind((0,g.Z)(n)),n.state={active:!1},n}return(0,m.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,y.jsxs)("div",{className:b().navwrapper,children:[(0,y.jsx)("div",{className:b().gradbar}),(0,y.jsxs)("nav",{className:b().navbar,children:[(0,y.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,y.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,y.jsx)(N.G,{className:b().bar,icon:k.xiG})}),(0,y.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,y.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,y.jsx)(d(),{href:"/resources/",children:(0,y.jsx)("a",{children:"Resources"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/data_cards/",children:(0,y.jsx)("a",{children:"Data Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/model_cards",children:(0,y.jsx)("a",{children:"Model Cards"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/tutorials",children:(0,y.jsx)("a",{children:"tutorials"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/results/",children:(0,y.jsx)("a",{children:"Results"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/papers/",children:(0,y.jsx)("a",{children:"Papers"})})}),(0,y.jsx)("li",{className:b().navitem,children:(0,y.jsx)(d(),{href:"/workshop",children:(0,y.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),C="GEM";function E(e){var n=e.children,a=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,y.jsxs)(y.Fragment,{children:[(0,y.jsxs)(t(),{children:[(0,y.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,y.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,y.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,y.jsx)("meta",{name:"og:title",content:C}),(0,y.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,y.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,y.jsx)("header",{className:i().header,children:(0,y.jsx)(w,{})}),(0,y.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,y.jsx)("main",{children:n}),(0,y.jsx)("div",{className:i().push})]}),(0,y.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!a&&(0,y.jsx)("span",{className:i().backToHome,children:(0,y.jsx)(d(),{href:"/",children:(0,y.jsx)("a",{children:"← Home"})})}),(0,y.jsxs)("span",{children:["If you have any questions, please join our ",(0,y.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},8982:function(e,n,a){"use strict";a.r(n),a.d(n,{__N_SSG:function(){return o},default:function(){return _}});var r=a(6057),t=a(9008),s=a.n(t),i=a(7839),l=a.n(i),c=a(5893),o=!0;function _(e){var n=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2022"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM Workshop at EMNLP 2022"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:n.contentHtml}})]})]})}},5633:function(e,n,a){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop/2022-call",function(){return a(8982)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=5633)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/pages/workshop/2023-call-1b0cb7c36f248bb5.js b/_next/static/chunks/pages/workshop/2023-call-1b0cb7c36f248bb5.js
new file mode 100644
index 00000000..88a5eed0
--- /dev/null
+++ b/_next/static/chunks/pages/workshop/2023-call-1b0cb7c36f248bb5.js
@@ -0,0 +1 @@
+(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[35],{6057:function(e,a,n){"use strict";n.d(a,{Z:function(){return C},y:function(){return B}});var r=n(9008),t=n.n(r),s=n(2717),i=n.n(s),l=n(1943),c=n.n(l),o=n(7839),_=n.n(o),h=n(1664),d=n.n(h),u=n(2777),g=n(2262),m=n(748),v=n(5959),x=n(3553),p=n(7247),f=n(7294),j=n(4776),b=n.n(j),y=n(9417),k=n(7814),N=n(5893),w=function(e){(0,v.Z)(r,e);var a,n=(a=function(){if("undefined"==typeof Reflect||!Reflect.construct||Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch(e){return!1}}(),function(){var e,n=(0,p.Z)(r);if(a){var t=(0,p.Z)(this).constructor;e=Reflect.construct(n,arguments,t)}else e=n.apply(this,arguments);return(0,x.Z)(this,e)});function r(e){var a;return(0,u.Z)(this,r),(a=n.call(this,e)).handleMobileClick=a.handleMobileClick.bind((0,m.Z)(a)),a.state={active:!1},a}return(0,g.Z)(r,[{key:"handleMobileClick",value:function(){var e=this.state.active;this.setState({active:!e})}},{key:"render",value:function(){return(0,N.jsxs)("div",{className:b().navwrapper,children:[(0,N.jsx)("div",{className:b().gradbar}),(0,N.jsxs)("nav",{className:b().navbar,children:[(0,N.jsx)("span",{className:_().headingLg+" "+b().navbarlogo,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"GEM BENCHMARK"})})}),(0,N.jsx)("div",{className:b().menutoggle,id:"mobile-menu",onClick:this.handleMobileClick,children:(0,N.jsx)(k.G,{className:b().bar,icon:y.xiG})}),(0,N.jsxs)("ul",{className:this.state.active?b().nav+" "+b().mobilenav:b().nav,children:[(0,N.jsx)("li",{className:this.state.active?b().navitem:b().navitem+" "+b().pushright,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/resources/",children:(0,N.jsx)("a",{children:"Resources"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/data_cards/",children:(0,N.jsx)("a",{children:"Data Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/model_cards",children:(0,N.jsx)("a",{children:"Model Cards"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/tutorials",children:(0,N.jsx)("a",{children:"tutorials"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/results/",children:(0,N.jsx)("a",{children:"Results"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/papers/",children:(0,N.jsx)("a",{children:"Papers"})})}),(0,N.jsx)("li",{className:b().navitem,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/workshop",children:(0,N.jsx)("a",{children:"Workshop"})})})]})]})]})}}]),r}(f.Component),B="GEM";function C(e){var a=e.children,n=e.home,r=e.nlAugmenter,s=e.wideContainer;return(0,N.jsxs)(N.Fragment,{children:[(0,N.jsxs)(t(),{children:[(0,N.jsx)("link",{rel:"icon",href:"/favicon.ico"}),(0,N.jsx)("meta",{name:"description",content:"Benchmark natural language generation systems with GEM."}),(0,N.jsx)("meta",{property:"og:image",content:"https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&md=1&fontSize=100px&images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"}),(0,N.jsx)("meta",{name:"og:title",content:B}),(0,N.jsx)("meta",{name:"twitter:card",content:"summary_large_image"})]}),(0,N.jsxs)("div",{className:"".concat(i().background," ").concat(r&&c().background),children:[(0,N.jsx)("header",{className:i().header,children:(0,N.jsx)(w,{})}),(0,N.jsxs)("div",{className:"".concat(i().container," ").concat(s&&i().wideContainer),children:[(0,N.jsx)("main",{children:a}),(0,N.jsx)("div",{className:i().push})]}),(0,N.jsxs)("footer",{className:i().footer+" "+_().eggshell,children:[!n&&(0,N.jsx)("span",{className:i().backToHome,children:(0,N.jsx)(d(),{legacyBehavior:!0,href:"/",children:(0,N.jsx)("a",{children:"← Home"})})}),(0,N.jsxs)("span",{children:["If you have any questions, please join our ",(0,N.jsx)("a",{href:"https://groups.google.com/g/gem-benchmark",target:"_blank",className:_().accentUnderline,children:"google group"})," for support."]})]})]})]})}},9827:function(e,a,n){"use strict";n.r(a),n.d(a,{__N_SSG:function(){return o},default:function(){return _}});var r=n(6057),t=n(9008),s=n.n(t),i=n(7839),l=n.n(i),c=n(5893),o=!0;function _(e){var a=e.workshopData;return(0,c.jsxs)(r.Z,{children:[(0,c.jsx)(s(),{children:(0,c.jsx)("title",{children:"GEM Workshop 2022"})}),(0,c.jsxs)("article",{children:[(0,c.jsx)("span",{className:l().headingXl,children:"GEM Workshop at EMNLP 2023"}),(0,c.jsx)("span",{className:l().smallSpace}),(0,c.jsx)("div",{dangerouslySetInnerHTML:{__html:a.contentHtml}})]})]})}},6253:function(e,a,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/workshop/2023-call",function(){return n(9827)}])},2717:function(e){e.exports={container:"layout_container__FUycR",wideContainer:"layout_wideContainer__IUVFY",header:"layout_header__SFlEE",backToHome:"layout_backToHome__D9QFr",footer:"layout_footer__WlhMu",push:"layout_push__lpoMK",background:"layout_background__oCFQX"}},4776:function(e){e.exports={navwrapper:"navbar_navwrapper__RkXSe",navbar:"navbar_navbar__vdWdK",navbarlogo:"navbar_navbarlogo__u28NK",pushright:"navbar_pushright___9_8s",navitem:"navbar_navitem__15TsF",menutoggle:"navbar_menutoggle__4Urrc",bar:"navbar_bar__f8cyd",features:"navbar_features__5epw7",mobilenav:"navbar_mobilenav__yIhee",gradbar:"navbar_gradbar__Vli6s"}},1943:function(e){e.exports={heading:"nl_augmenter_heading__7Z5D1",background:"nl_augmenter_background__ZLeqH"}},7839:function(e){e.exports={heading2Xl:"utils_heading2Xl__oxFoJ",headingXl:"utils_headingXl__zlq1q",headingLg:"utils_headingLg__RYtYb",headingMd:"utils_headingMd__XQE5B",borderCircle:"utils_borderCircle__zmKqF",colorInherit:"utils_colorInherit__Jz9NS",padding1px:"utils_padding1px__Ov2XA",list:"utils_list__zR_Au",listItem:"utils_listItem__6FEiz",lightText:"utils_lightText__B_gv3",smallSpace:"utils_smallSpace__dcJPu",eggshell:"utils_eggshell__3hbbY",light:"utils_light__0l1E5",accent:"utils_accent__r4v7V",accentUnderline:"utils_accentUnderline__VG89l",accentBorder:"utils_accentBorder__YkoyK",lightaccent:"utils_lightaccent__w3iDA",noBorder:"utils_noBorder__l3yv0",icon:"utils_icon__AiQ5I",spacer:"utils_spacer__a__NY"}}},function(e){e.O(0,[976,50,774,888,179],function(){return e(e.s=6253)}),_N_E=e.O()}]);
\ No newline at end of file
diff --git a/_next/static/chunks/webpack-a73844ba913878ac.js b/_next/static/chunks/webpack-a73844ba913878ac.js
new file mode 100644
index 00000000..179a6143
--- /dev/null
+++ b/_next/static/chunks/webpack-a73844ba913878ac.js
@@ -0,0 +1 @@
+!function(){"use strict";var e,t,n,r,o,c,u,i,f,a={},d={};function l(e){var t=d[e];if(void 0!==t)return t.exports;var n=d[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e].call(n.exports,n,n.exports,l),r=!1}finally{r&&delete d[e]}return n.loaded=!0,n.exports}l.m=a,e=[],l.O=function(t,n,r,o){if(n){o=o||0;for(var c=e.length;c>0&&e[c-1][2]>o;c--)e[c]=e[c-1];e[c]=[n,r,o];return}for(var u=1/0,c=0;c<e.length;c++){for(var n=e[c][0],r=e[c][1],o=e[c][2],i=!0,f=0;f<n.length;f++)u>=o&&Object.keys(l.O).every(function(e){return l.O[e](n[f])})?n.splice(f--,1):(i=!1,o<u&&(u=o));if(i){e.splice(c--,1);var a=r();void 0!==a&&(t=a)}}return t},l.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return l.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},l.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);l.r(o);var c={};t=t||[null,n({}),n([]),n(n)];for(var u=2&r&&e;"object"==typeof u&&!~t.indexOf(u);u=n(u))Object.getOwnPropertyNames(u).forEach(function(t){c[t]=function(){return e[t]}});return c.default=function(){return e},l.d(o,c),o},l.d=function(e,t){for(var n in t)l.o(t,n)&&!l.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},l.f={},l.e=function(e){return Promise.all(Object.keys(l.f).reduce(function(t,n){return l.f[n](e,t),t},[]))},l.u=function(e){return"static/chunks/"+e+"."+({249:"84d1a3fa9f3db97d",698:"6343bfd066c936de"})[e]+".js"},l.miniCssF=function(e){return"static/css/"+({35:"50ad98e60bd49ad7",38:"be720738ed0b38ae",192:"50ad98e60bd49ad7",200:"50ad98e60bd49ad7",204:"0aee61fa7f903b6c",255:"afca8ff2be2c1f7a",303:"50ad98e60bd49ad7",372:"f04ddab54834d245",405:"553834eb3ba55b34",413:"50ad98e60bd49ad7",467:"6a1c82b8c4bd78ef",524:"50ad98e60bd49ad7",539:"50ad98e60bd49ad7",543:"bceb2d77c77db79f",550:"50ad98e60bd49ad7",584:"5fcd590ccf37fef1",639:"50ad98e60bd49ad7",711:"9e5a1d51e5c0dd74",803:"50ad98e60bd49ad7",888:"86a77084a15a5546",906:"50ad98e60bd49ad7",926:"6517b3935a1e344f",987:"50ad98e60bd49ad7"})[e]+".css"},l.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),l.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",l.l=function(e,t,n,c){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var u,i,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var d=f[a];if(d.getAttribute("src")==e||d.getAttribute("data-webpack")==o+n){u=d;break}}u||(i=!0,(u=document.createElement("script")).charset="utf-8",u.timeout=120,l.nc&&u.setAttribute("nonce",l.nc),u.setAttribute("data-webpack",o+n),u.src=l.tu(e)),r[e]=[t];var b=function(t,n){u.onerror=u.onload=null,clearTimeout(s);var o=r[e];if(delete r[e],u.parentNode&&u.parentNode.removeChild(u),o&&o.forEach(function(e){return e(n)}),t)return t(n)},s=setTimeout(b.bind(null,void 0,{type:"timeout",target:u}),12e4);u.onerror=b.bind(null,u.onerror),u.onload=b.bind(null,u.onload),i&&document.head.appendChild(u)},l.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},l.tt=function(){return void 0===c&&(c={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(c=trustedTypes.createPolicy("nextjs#bundler",c))),c},l.tu=function(e){return l.tt().createScriptURL(e)},l.p="/_next/",u={272:0},l.f.j=function(e,t){var n=l.o(u,e)?u[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=u[e]=[t,r]});t.push(n[2]=r);var o=l.p+l.u(e),c=Error();l.l(o,function(t){if(l.o(u,e)&&(0!==(n=u[e])&&(u[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;c.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",c.name="ChunkLoadError",c.type=r,c.request=o,n[1](c)}},"chunk-"+e,e)}else u[e]=0}},l.O.j=function(e){return 0===u[e]},i=function(e,t){var n,r,o=t[0],c=t[1],i=t[2],f=0;if(o.some(function(e){return 0!==u[e]})){for(n in c)l.o(c,n)&&(l.m[n]=c[n]);if(i)var a=i(l)}for(e&&e(t);f<o.length;f++)r=o[f],l.o(u,r)&&u[r]&&u[r][0](),u[r]=0;return l.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(i.bind(null,0)),f.push=i.bind(null,f.push.bind(f))}();
\ No newline at end of file
diff --git a/_next/static/chunks/webpack-b48e3bfe07390621.js b/_next/static/chunks/webpack-b48e3bfe07390621.js
deleted file mode 100644
index deed4b3f..00000000
--- a/_next/static/chunks/webpack-b48e3bfe07390621.js
+++ /dev/null
@@ -1 +0,0 @@
-!function(){"use strict";var e,t,n,r,o,c,u,i,f,a={},d={};function l(e){var t=d[e];if(void 0!==t)return t.exports;var n=d[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e].call(n.exports,n,n.exports,l),r=!1}finally{r&&delete d[e]}return n.loaded=!0,n.exports}l.m=a,e=[],l.O=function(t,n,r,o){if(n){o=o||0;for(var c=e.length;c>0&&e[c-1][2]>o;c--)e[c]=e[c-1];e[c]=[n,r,o];return}for(var u=1/0,c=0;c<e.length;c++){for(var n=e[c][0],r=e[c][1],o=e[c][2],i=!0,f=0;f<n.length;f++)u>=o&&Object.keys(l.O).every(function(e){return l.O[e](n[f])})?n.splice(f--,1):(i=!1,o<u&&(u=o));if(i){e.splice(c--,1);var a=r();void 0!==a&&(t=a)}}return t},l.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return l.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},l.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);l.r(o);var c={};t=t||[null,n({}),n([]),n(n)];for(var u=2&r&&e;"object"==typeof u&&!~t.indexOf(u);u=n(u))Object.getOwnPropertyNames(u).forEach(function(t){c[t]=function(){return e[t]}});return c.default=function(){return e},l.d(o,c),o},l.d=function(e,t){for(var n in t)l.o(t,n)&&!l.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},l.f={},l.e=function(e){return Promise.all(Object.keys(l.f).reduce(function(t,n){return l.f[n](e,t),t},[]))},l.u=function(e){return"static/chunks/"+e+"."+({249:"84d1a3fa9f3db97d",698:"6343bfd066c936de"})[e]+".js"},l.miniCssF=function(e){return"static/css/"+({38:"be720738ed0b38ae",192:"50ad98e60bd49ad7",200:"50ad98e60bd49ad7",204:"0aee61fa7f903b6c",255:"afca8ff2be2c1f7a",303:"50ad98e60bd49ad7",372:"f04ddab54834d245",405:"553834eb3ba55b34",413:"50ad98e60bd49ad7",467:"6a1c82b8c4bd78ef",524:"50ad98e60bd49ad7",539:"50ad98e60bd49ad7",543:"bceb2d77c77db79f",550:"50ad98e60bd49ad7",584:"5fcd590ccf37fef1",639:"50ad98e60bd49ad7",711:"9e5a1d51e5c0dd74",803:"50ad98e60bd49ad7",888:"86a77084a15a5546",906:"50ad98e60bd49ad7",926:"6517b3935a1e344f",987:"50ad98e60bd49ad7"})[e]+".css"},l.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),l.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",l.l=function(e,t,n,c){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var u,i,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var d=f[a];if(d.getAttribute("src")==e||d.getAttribute("data-webpack")==o+n){u=d;break}}u||(i=!0,(u=document.createElement("script")).charset="utf-8",u.timeout=120,l.nc&&u.setAttribute("nonce",l.nc),u.setAttribute("data-webpack",o+n),u.src=l.tu(e)),r[e]=[t];var b=function(t,n){u.onerror=u.onload=null,clearTimeout(s);var o=r[e];if(delete r[e],u.parentNode&&u.parentNode.removeChild(u),o&&o.forEach(function(e){return e(n)}),t)return t(n)},s=setTimeout(b.bind(null,void 0,{type:"timeout",target:u}),12e4);u.onerror=b.bind(null,u.onerror),u.onload=b.bind(null,u.onload),i&&document.head.appendChild(u)},l.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},l.tt=function(){return void 0===c&&(c={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(c=trustedTypes.createPolicy("nextjs#bundler",c))),c},l.tu=function(e){return l.tt().createScriptURL(e)},l.p="/_next/",u={272:0},l.f.j=function(e,t){var n=l.o(u,e)?u[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=u[e]=[t,r]});t.push(n[2]=r);var o=l.p+l.u(e),c=Error();l.l(o,function(t){if(l.o(u,e)&&(0!==(n=u[e])&&(u[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;c.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",c.name="ChunkLoadError",c.type=r,c.request=o,n[1](c)}},"chunk-"+e,e)}else u[e]=0}},l.O.j=function(e){return 0===u[e]},i=function(e,t){var n,r,o=t[0],c=t[1],i=t[2],f=0;if(o.some(function(e){return 0!==u[e]})){for(n in c)l.o(c,n)&&(l.m[n]=c[n]);if(i)var a=i(l)}for(e&&e(t);f<o.length;f++)r=o[f],l.o(u,r)&&u[r]&&u[r][0](),u[r]=0;return l.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(i.bind(null,0)),f.push=i.bind(null,f.push.bind(f))}();
\ No newline at end of file
diff --git a/data_cards.html b/data_cards.html
index 3b465770..aa6b4d95 100644
--- a/data_cards.html
+++ b/data_cards.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Tasks</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/9e5a1d51e5c0dd74.css" as="style"/><link rel="stylesheet" href="/_next/static/css/9e5a1d51e5c0dd74.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards-822c194007d84081.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><section><h2 class="utils_headingXl__zlq1q">List of Tasks</h2><p class="data_cards_description__V02ne">The list below links to data statements [<a href="https://www.aclweb.org/anthology/Q18-1041/"><a target="_blank">1</a></a>, <a href="https://arxiv.org/abs/1803.09010"><a target="_blank">2</a></a>] for each of the datasets that are part of GEM tasks. The template used to produce the initial statements and a guide on how to write them can be found here: [<a href="/statement_template.md"><a download="" target="_blank">download template</a></a>] [<a href="/tutorials/writing_a_data_card"><a>view guide</a></a>]. We have released an extended version of this template and an <a href="https://huggingface.co/spaces/GEM/DatasetCardForm"><a target="_blank">interactive collection tool</a></a>.</p><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a href="/data_cards/conversational_weather"><a class="data_cards_larger__T1vAu">conversational_weather</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The purpose of this dataset is to assess how well a model can learn a template-like structure in a very low data setting. The task here is to produce a response to a weather-related query. The reply is further specified through the data attributes and discourse structure in the input. The output contains both the lexicalized text and discourse markers for attributes (e.g., `_ARG_TEMP_ 34`).</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/dart"><a class="data_cards_larger__T1vAu">dart</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">DART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format. The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still retaining the full information.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/e2e_nlg"><a class="data_cards_larger__T1vAu">e2e_nlg</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The E2E NLG dataset is an English benchmark dataset for data-to-text models that verbalize a set of 2-9 key-value attribute pairs in the restaurant domain. The version used for GEM is the cleaned E2E NLG dataset, which filters examples with hallucinations and outputs that don&#x27;t fully cover all input attributes.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/mlb_data_to_text"><a class="data_cards_larger__T1vAu">mlb_data_to_text</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The MLB dataset is an English sport-related data-to-text dataset in the baseball domain. The input is a large table with results of a game and the output is a description of the game.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/RotoWire_English-German"><a class="data_cards_larger__T1vAu">RotoWire_English-German</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English, German</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This dataset is a data-to-text dataset in the basketball domain. The input are tables in a fixed format with statistics about a game (in English) and the target is a German translation of the originally English description. The translations were done by professional translators with basketball experience. The dataset can be used to evaluate the cross-lingual data-to-text capabilities of a model with complex inputs.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/sportsett_basketball"><a class="data_cards_larger__T1vAu">sportsett_basketball</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The sportsett dataset is an English data-to-text dataset in the basketball domain. The inputs are statistics summarizing an NBA game and the outputs are high-quality descriptions of the game in natural language.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/surface_realisation_st_2020"><a class="data_cards_larger__T1vAu">surface_realisation_st_2020</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Arabic, Chinese, English, French, Hindi, Indonesian, Japanese, Korean, Portuguese, Russian, Spanish, Castilian</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This dataset was used as part of the multilingual surface realization shared task in which a model gets full or partial universal dependency structures and has to reconstruct the natural language. This dataset support 11 languages.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/totto"><a class="data_cards_larger__T1vAu">totto</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">ToTTo is a high-quality English table-to-text dataset with more than 100,000 examples in which a table from Wikipedia with highlighted cells is paired with a sentence that describes the highlighted cells. All examples in the dataset were post-edited in multiple steps to ensure that the targets are fully faithful to the input information.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/turku_hockey_data2text"><a class="data_cards_larger__T1vAu">turku_hockey_data2text</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Finnish</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This is a Finnish data-to-text dataset in which the input is structured information about a hockey game and the output a description of the game.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/viggo"><a class="data_cards_larger__T1vAu">viggo</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">ViGGO is an English data-to-text generation dataset in the video game domain, with target responses being more conversational than information-seeking, yet constrained to the information presented in a meaning representation. The dataset is relatively small with about 5,000 datasets but very clean, and can thus serve for evaluating transfer learning, low-resource, or few-shot capabilities of neural models.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/web_nlg"><a class="data_cards_larger__T1vAu">web_nlg</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Russian, English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">WebNLG is a bi-lingual dataset (English, Russian) of parallel DBpedia triple sets and short texts that cover about 450 different DBpedia properties. The WebNLG data was originally created to promote the development of RDF verbalisers able to generate short text and to handle micro-planning (i.e., sentence segmentation and ordering, referring expression generation, aggregation); the goal of the task is to generate texts starting from 1 to 7 input triples which have entities in common (so the input is actually a connected Knowledge Graph). The dataset contains about 17,000 triple sets and 45,000 crowdsourced texts in English, and 7,000 triples sets and 19,000 crowdsourced texts in Russian. A challenging test set section with entities and/or properties that have not been seen at training time is available.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/CrossWOZ"><a class="data_cards_larger__T1vAu">CrossWOZ</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Chinese</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">CrossWOZ is a Chinese multi-domain task-oriented dialogue dataset . It contains 6K dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. About 60{\%} of the dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition across domains in conversation.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/cs_restaurants"><a class="data_cards_larger__T1vAu">cs_restaurants</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Czech</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The Czech Restaurants dataset is a task oriented dialog dataset in which a model needs to verbalize a response that a service agent could provide which is specified through a series of dialog acts. The dataset originated as a translation of an English dataset to test the generation capabilities of an NLG system on a highly morphologically rich language like Czech.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/dstc10_track2_task2"><a class="data_cards_larger__T1vAu">dstc10_track2_task2</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">En</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The DSTC10 Track2 Task 2 follows the DSTC9 Track1 task, where participants have to implement knowledge-grounded dialog systems. The training dataset is inherited from the DSTC9 challenge and is in the written domain, while the test set is newly collected and consists of noisy ASR transcripts. Hence, the dataset facilitates building models for grounded dialog response generation.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/RiSAWOZ"><a class="data_cards_larger__T1vAu">RiSAWOZ</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Mandarin Chinese</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">RiSAWOZ is a Chinese dialog dataset. It can be used to study various dialogue tasks, such as Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and Unified Generative Ellipsis and Coreference Resolution.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/schema_guided_dialog"><a class="data_cards_larger__T1vAu">schema_guided_dialog</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The GEM version of this dataset functions as a response generation dataset. The input specifies dialog acts that a model needs to verbalize. The Schema-Guided Dialog dataset is challenging since it comprises multiple domains from hotel and travel to restaurants, and a wide range of dialog acts. The context of each conversation is provided as well.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/Taskmaster"><a class="data_cards_larger__T1vAu">Taskmaster</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This is a large task-oriented dialog dataset in which a model has to produce the response. The input contains the context and a structured representation of what the model is supposed to generate. The input is already pre-formatted as string, turning this into a pure text-to-text problem.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/opusparcus"><a class="data_cards_larger__T1vAu">opusparcus</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Paraphrasing</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">German, English, Finnish, French, Russian, Swedish</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">Opusparcus is a paraphrase corpus for six European languages - German, English, Finnish, French, Russian, and Swedish. The paraphrases consist of subtitles from movies and TV shows.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/turku_paraphrase_corpus"><a class="data_cards_larger__T1vAu">turku_paraphrase_corpus</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Paraphrasing</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Finnish</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This is a Finnish paraphrase corpus which consists of pairs of text passages, where a typical passage is about a sentence long. It can be used to either identify or generate paraphrases.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/FairytaleQA"><a class="data_cards_larger__T1vAu">FairytaleQA</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Question Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The FairytaleQA Dataset is an English-language dataset focusing on narrative comprehension of kindergarten to eighth-grade students. Generated by educational experts based on an evidence-based theoretical framework, FairytaleQA consists of 10,580 explicit and implicit questions derived from 278 children-friendly stories, covering seven types of narrative elements or relations. The Dataset was corrected to support both the tasks of Question Generation and Question Answering.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/squad_v2"><a class="data_cards_larger__T1vAu">squad_v2</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Question Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">SQuAD2.0 is a dataset that tests the ability of a system to not only answer reading comprehension questions, but also abstain when presented with a question that cannot be answered based on the provided paragraph.  F1 score is used to evaluate models on the leaderboard. In GEM, we are using this dataset for the question-generation task in which a model should generate squad-like questions from an input text.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/ART"><a class="data_cards_larger__T1vAu">ART</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Reasoning</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">Abductive reasoning is inference to the most plausible explanation. For example, if Jenny finds her house in a mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief broke into her house and caused the mess, as the most plausible explanation.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/common_gen"><a class="data_cards_larger__T1vAu">common_gen</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Reasoning</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">CommonGen is an English text generation task to explicitly test machines for the ability of generative commonsense reasoning. Given a set of common concepts, the task is to generate a coherent sentence describing an everyday scenario using these concepts. CommonGen is challenging because it inherently requires 1) relational reasoning using background commonsense knowledge, and 2) compositional generalization ability to work on unseen concept combinations. The dataset, constructed through a combination of crowd-sourcing from AMT and existing caption corpora, consists of 30k concept-sets and 50k sentences in total. Note that the CommonGen test set is private and requires submission to the external leaderboard.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/BiSECT"><a class="data_cards_larger__T1vAu">BiSECT</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Simplification</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English, German, French, Spanish, Castilian</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This dataset is composed of 1 million complex sentences with the task to split and simplify them while retaining the full meaning. Compared to other simplification corpora, BiSECT requires more significant edits. BiSECT offers splits in English, German, French, and Spanish.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/cochrane-simplification"><a class="data_cards_larger__T1vAu">cochrane-simplification</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Simplification</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">Cochrane is an English dataset for paragraph-level simplification of medical texts. Cochrane is a database of systematic reviews of clinical questions, many of which have summaries in plain English targeting readers without a university education. The dataset comprises about 4,500 of such pairs.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/SIMPITIKI"><a class="data_cards_larger__T1vAu">SIMPITIKI</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Simplification</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Italian</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">SIMPITIKI is an Italian Simplification dataset. Its examples were selected from Italian Wikipedia such that their editing tracking descriptions contain any of the words &quot;Simplified&quot;/&quot;Simplify&quot;/&quot;Simplification&quot;.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/wiki_auto_asset_turk"><a class="data_cards_larger__T1vAu">wiki_auto_asset_turk</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Simplification</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">WikiAuto is an English simplification dataset that we paired with ASSET and TURK, two very high-quality evaluation datasets, as test sets. The input is an English sentence taken from Wikipedia and the target a simplified sentence. ASSET and TURK contain the same test examples but have references that are simplified in different ways (splitting sentences vs. rewriting and splitting).</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/indonlg"><a class="data_cards_larger__T1vAu">indonlg</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Indonesian, Javanese, Sundanese</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">IndoNLG is a collection of various Indonesian, Javanese, and Sundanese NLG tasks including summarization, question answering, chit-chat, and three different pairs of machine translation (MT) tasks.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/mlsum"><a class="data_cards_larger__T1vAu">mlsum</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">German, Spanish, Castilian</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">MLSum is a multilingual summarization dataset crawled from different news websites. The GEM version supports the German and Spanish subset alongside specifically collected challenge sets for COVID-related articles to test out-of-domain generalization.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/OrangeSum"><a class="data_cards_larger__T1vAu">OrangeSum</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">French</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">OrangeSum is a French summarization dataset inspired by XSum. It features two subtasks - abstract generation and title generation. The data was sourced from &quot;Orange Actu&quot; articles between 2011 and 2020.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/squality"><a class="data_cards_larger__T1vAu">squality</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">SQuALITY (Summarization-format QUestion Answering with Long Input Texts, Yes!) is a summarization dataset that is (1) Abstractive, (2) Long-input - The input document are short stories between 3000--6000 words. (3) Question-focused - Each story is associated with multiple question-summary pairs. (4) Multi-reference - Each question is paired with 4 summaries. (5) High-quality - The summaries are crowdsourced from skilled and trained writers.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/wiki_cat_sum"><a class="data_cards_larger__T1vAu">wiki_cat_sum</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">WikiCatSum is an English summarization dataset in three domains - animals, companies, and film. It provides multiple paragraphs of text paired with a summary of the paragraphs.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/wiki_lingua"><a class="data_cards_larger__T1vAu">wiki_lingua</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English, Spanish, Castilian, Portuguese, French, German, Russian, Italian, Indonesian, Dutch, Flemish, Arabic, Chinese, Vietnamese, Thai, Japanese, Korean, Hindi, Czech, Turkish</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">Placeholder</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/xlsum"><a class="data_cards_larger__T1vAu">xlsum</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Amharic, Arabic, Azerbaijani, Bengali, Bangla, Burmese, Chinese (family), English, French, Gujarati, Hausa, Hindi, Igbo, Indonesian, Japanese, Rundi, Korean, Kirghiz, Kyrgyz, Marathi, Nepali (individual language), Oromo, Pushto, Pashto, Persian, Ghanaian Pidgin English, Portuguese, Panjabi, Punjabi, Russian, Scottish Gaelic, Gaelic, Serbian, Romano-Serbian, Sinhala, Sinhalese, Somali, Spanish, Castilian, Swahili (individual language), Kiswahili, Tamil, Telugu, Thai, Tigrinya, Turkish, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yoruba</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">XLSum is a highly multilingual summarization dataset supporting 44 language. The data stems from BBC news articles.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/xsum"><a class="data_cards_larger__T1vAu">xsum</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">XSum is an English news summarization dataset where the task is to predict the first sentence of an article from the rest of it.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/xwikis"><a class="data_cards_larger__T1vAu">xwikis</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">German, English, French, Czech</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The XWikis Corpus provides datasets with different language pairs and directions for cross-lingual and multi-lingual abstractive document summarisation.</div></li><li class="utils_listItem__6FEiz"><a href="/data_cards/SciDuet"><a class="data_cards_larger__T1vAu">SciDuet</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Text-to-Slide</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This dataset supports the document-to-slide generation task where a model has to generate presentation slide content from the text of a document.</div></li></ul></section></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"allTasksData":[{"id":"conversational_weather","title":"conversational_weather","type":"Data-to-Text","languages":"English","summary":"The purpose of this dataset is to assess how well a model can learn a template-like structure in a very low data setting. The task here is to produce a response to a weather-related query. The reply is further specified through the data attributes and discourse structure in the input. The output contains both the lexicalized text and discourse markers for attributes (e.g., `_ARG_TEMP_ 34`)."},{"id":"dart","title":"dart","type":"Data-to-Text","languages":"English","summary":"DART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format. The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still retaining the full information."},{"id":"e2e_nlg","title":"e2e_nlg","type":"Data-to-Text","languages":"English","summary":"The E2E NLG dataset is an English benchmark dataset for data-to-text models that verbalize a set of 2-9 key-value attribute pairs in the restaurant domain. The version used for GEM is the cleaned E2E NLG dataset, which filters examples with hallucinations and outputs that don't fully cover all input attributes."},{"id":"mlb_data_to_text","title":"mlb_data_to_text","type":"Data-to-Text","languages":"English","summary":"The MLB dataset is an English sport-related data-to-text dataset in the baseball domain. The input is a large table with results of a game and the output is a description of the game."},{"id":"RotoWire_English-German","title":"RotoWire_English-German","type":"Data-to-Text","languages":"English, German","summary":"This dataset is a data-to-text dataset in the basketball domain. The input are tables in a fixed format with statistics about a game (in English) and the target is a German translation of the originally English description. The translations were done by professional translators with basketball experience. The dataset can be used to evaluate the cross-lingual data-to-text capabilities of a model with complex inputs."},{"id":"sportsett_basketball","title":"sportsett_basketball","type":"Data-to-Text","languages":"English","summary":"The sportsett dataset is an English data-to-text dataset in the basketball domain. The inputs are statistics summarizing an NBA game and the outputs are high-quality descriptions of the game in natural language."},{"id":"surface_realisation_st_2020","title":"surface_realisation_st_2020","type":"Data-to-Text","languages":"Arabic, Chinese, English, French, Hindi, Indonesian, Japanese, Korean, Portuguese, Russian, Spanish, Castilian","summary":"This dataset was used as part of the multilingual surface realization shared task in which a model gets full or partial universal dependency structures and has to reconstruct the natural language. This dataset support 11 languages."},{"id":"totto","title":"totto","type":"Data-to-Text","languages":"English","summary":"ToTTo is a high-quality English table-to-text dataset with more than 100,000 examples in which a table from Wikipedia with highlighted cells is paired with a sentence that describes the highlighted cells. All examples in the dataset were post-edited in multiple steps to ensure that the targets are fully faithful to the input information."},{"id":"turku_hockey_data2text","title":"turku_hockey_data2text","type":"Data-to-Text","languages":"Finnish","summary":"This is a Finnish data-to-text dataset in which the input is structured information about a hockey game and the output a description of the game."},{"id":"viggo","title":"viggo","type":"Data-to-Text","languages":"English","summary":"ViGGO is an English data-to-text generation dataset in the video game domain, with target responses being more conversational than information-seeking, yet constrained to the information presented in a meaning representation. The dataset is relatively small with about 5,000 datasets but very clean, and can thus serve for evaluating transfer learning, low-resource, or few-shot capabilities of neural models."},{"id":"web_nlg","title":"web_nlg","type":"Data-to-Text","languages":"Russian, English","summary":"WebNLG is a bi-lingual dataset (English, Russian) of parallel DBpedia triple sets and short texts that cover about 450 different DBpedia properties. The WebNLG data was originally created to promote the development of RDF verbalisers able to generate short text and to handle micro-planning (i.e., sentence segmentation and ordering, referring expression generation, aggregation); the goal of the task is to generate texts starting from 1 to 7 input triples which have entities in common (so the input is actually a connected Knowledge Graph). The dataset contains about 17,000 triple sets and 45,000 crowdsourced texts in English, and 7,000 triples sets and 19,000 crowdsourced texts in Russian. A challenging test set section with entities and/or properties that have not been seen at training time is available."},{"id":"CrossWOZ","title":"CrossWOZ","type":"Dialog Response Generation","languages":"Chinese","summary":"CrossWOZ is a Chinese multi-domain task-oriented dialogue dataset . It contains 6K dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. About 60{\\%} of the dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition across domains in conversation."},{"id":"cs_restaurants","title":"cs_restaurants","type":"Dialog Response Generation","languages":"Czech","summary":"The Czech Restaurants dataset is a task oriented dialog dataset in which a model needs to verbalize a response that a service agent could provide which is specified through a series of dialog acts. The dataset originated as a translation of an English dataset to test the generation capabilities of an NLG system on a highly morphologically rich language like Czech."},{"id":"dstc10_track2_task2","title":"dstc10_track2_task2","type":"Dialog Response Generation","languages":"En","summary":"The DSTC10 Track2 Task 2 follows the DSTC9 Track1 task, where participants have to implement knowledge-grounded dialog systems. The training dataset is inherited from the DSTC9 challenge and is in the written domain, while the test set is newly collected and consists of noisy ASR transcripts. Hence, the dataset facilitates building models for grounded dialog response generation."},{"id":"RiSAWOZ","title":"RiSAWOZ","type":"Dialog Response Generation","languages":"Mandarin Chinese","summary":"RiSAWOZ is a Chinese dialog dataset. It can be used to study various dialogue tasks, such as Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and Unified Generative Ellipsis and Coreference Resolution."},{"id":"schema_guided_dialog","title":"schema_guided_dialog","type":"Dialog Response Generation","languages":"English","summary":"The GEM version of this dataset functions as a response generation dataset. The input specifies dialog acts that a model needs to verbalize. The Schema-Guided Dialog dataset is challenging since it comprises multiple domains from hotel and travel to restaurants, and a wide range of dialog acts. The context of each conversation is provided as well."},{"id":"Taskmaster","title":"Taskmaster","type":"Dialog Response Generation","languages":"English","summary":"This is a large task-oriented dialog dataset in which a model has to produce the response. The input contains the context and a structured representation of what the model is supposed to generate. The input is already pre-formatted as string, turning this into a pure text-to-text problem."},{"id":"opusparcus","title":"opusparcus","type":"Paraphrasing","languages":"German, English, Finnish, French, Russian, Swedish","summary":"Opusparcus is a paraphrase corpus for six European languages - German, English, Finnish, French, Russian, and Swedish. The paraphrases consist of subtitles from movies and TV shows."},{"id":"turku_paraphrase_corpus","title":"turku_paraphrase_corpus","type":"Paraphrasing","languages":"Finnish","summary":"This is a Finnish paraphrase corpus which consists of pairs of text passages, where a typical passage is about a sentence long. It can be used to either identify or generate paraphrases."},{"id":"FairytaleQA","title":"FairytaleQA","type":"Question Generation","languages":"English","summary":"The FairytaleQA Dataset is an English-language dataset focusing on narrative comprehension of kindergarten to eighth-grade students. Generated by educational experts based on an evidence-based theoretical framework, FairytaleQA consists of 10,580 explicit and implicit questions derived from 278 children-friendly stories, covering seven types of narrative elements or relations. The Dataset was corrected to support both the tasks of Question Generation and Question Answering."},{"id":"squad_v2","title":"squad_v2","type":"Question Generation","languages":"English","summary":"SQuAD2.0 is a dataset that tests the ability of a system to not only answer reading comprehension questions, but also abstain when presented with a question that cannot be answered based on the provided paragraph.  F1 score is used to evaluate models on the leaderboard. In GEM, we are using this dataset for the question-generation task in which a model should generate squad-like questions from an input text."},{"id":"ART","title":"ART","type":"Reasoning","languages":"English","summary":"Abductive reasoning is inference to the most plausible explanation. For example, if Jenny finds her house in a mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief broke into her house and caused the mess, as the most plausible explanation.","This data loader focuses on abductive NLG":"a conditional English generation task for explaining given observations in natural language."},{"id":"common_gen","title":"common_gen","type":"Reasoning","languages":"English","summary":"CommonGen is an English text generation task to explicitly test machines for the ability of generative commonsense reasoning. Given a set of common concepts, the task is to generate a coherent sentence describing an everyday scenario using these concepts. CommonGen is challenging because it inherently requires 1) relational reasoning using background commonsense knowledge, and 2) compositional generalization ability to work on unseen concept combinations. The dataset, constructed through a combination of crowd-sourcing from AMT and existing caption corpora, consists of 30k concept-sets and 50k sentences in total. Note that the CommonGen test set is private and requires submission to the external leaderboard."},{"id":"BiSECT","title":"BiSECT","type":"Simplification","languages":"English, German, French, Spanish, Castilian","summary":"This dataset is composed of 1 million complex sentences with the task to split and simplify them while retaining the full meaning. Compared to other simplification corpora, BiSECT requires more significant edits. BiSECT offers splits in English, German, French, and Spanish."},{"id":"cochrane-simplification","title":"cochrane-simplification","type":"Simplification","languages":"English","summary":"Cochrane is an English dataset for paragraph-level simplification of medical texts. Cochrane is a database of systematic reviews of clinical questions, many of which have summaries in plain English targeting readers without a university education. The dataset comprises about 4,500 of such pairs."},{"id":"SIMPITIKI","title":"SIMPITIKI","type":"Simplification","languages":"Italian","summary":"SIMPITIKI is an Italian Simplification dataset. Its examples were selected from Italian Wikipedia such that their editing tracking descriptions contain any of the words \"Simplified\"/\"Simplify\"/\"Simplification\"."},{"id":"wiki_auto_asset_turk","title":"wiki_auto_asset_turk","type":"Simplification","languages":"English","summary":"WikiAuto is an English simplification dataset that we paired with ASSET and TURK, two very high-quality evaluation datasets, as test sets. The input is an English sentence taken from Wikipedia and the target a simplified sentence. ASSET and TURK contain the same test examples but have references that are simplified in different ways (splitting sentences vs. rewriting and splitting)."},{"id":"indonlg","title":"indonlg","type":"Summarization","languages":"Indonesian, Javanese, Sundanese","summary":"IndoNLG is a collection of various Indonesian, Javanese, and Sundanese NLG tasks including summarization, question answering, chit-chat, and three different pairs of machine translation (MT) tasks."},{"id":"mlsum","title":"mlsum","type":"Summarization","languages":"German, Spanish, Castilian","summary":"MLSum is a multilingual summarization dataset crawled from different news websites. The GEM version supports the German and Spanish subset alongside specifically collected challenge sets for COVID-related articles to test out-of-domain generalization."},{"id":"OrangeSum","title":"OrangeSum","type":"Summarization","languages":"French","summary":"OrangeSum is a French summarization dataset inspired by XSum. It features two subtasks - abstract generation and title generation. The data was sourced from \"Orange Actu\" articles between 2011 and 2020."},{"id":"squality","title":"squality","type":"Summarization","languages":"English","summary":"SQuALITY (Summarization-format QUestion Answering with Long Input Texts, Yes!) is a summarization dataset that is (1) Abstractive, (2) Long-input - The input document are short stories between 3000--6000 words. (3) Question-focused - Each story is associated with multiple question-summary pairs. (4) Multi-reference - Each question is paired with 4 summaries. (5) High-quality - The summaries are crowdsourced from skilled and trained writers."},{"id":"wiki_cat_sum","title":"wiki_cat_sum","type":"Summarization","languages":"English","summary":"WikiCatSum is an English summarization dataset in three domains - animals, companies, and film. It provides multiple paragraphs of text paired with a summary of the paragraphs."},{"id":"wiki_lingua","title":"wiki_lingua","type":"Summarization","languages":"English, Spanish, Castilian, Portuguese, French, German, Russian, Italian, Indonesian, Dutch, Flemish, Arabic, Chinese, Vietnamese, Thai, Japanese, Korean, Hindi, Czech, Turkish","summary":"Placeholder"},{"id":"xlsum","title":"xlsum","type":"Summarization","languages":"Amharic, Arabic, Azerbaijani, Bengali, Bangla, Burmese, Chinese (family), English, French, Gujarati, Hausa, Hindi, Igbo, Indonesian, Japanese, Rundi, Korean, Kirghiz, Kyrgyz, Marathi, Nepali (individual language), Oromo, Pushto, Pashto, Persian, Ghanaian Pidgin English, Portuguese, Panjabi, Punjabi, Russian, Scottish Gaelic, Gaelic, Serbian, Romano-Serbian, Sinhala, Sinhalese, Somali, Spanish, Castilian, Swahili (individual language), Kiswahili, Tamil, Telugu, Thai, Tigrinya, Turkish, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yoruba","summary":"XLSum is a highly multilingual summarization dataset supporting 44 language. The data stems from BBC news articles."},{"id":"xsum","title":"xsum","type":"Summarization","languages":"English","summary":"XSum is an English news summarization dataset where the task is to predict the first sentence of an article from the rest of it."},{"id":"xwikis","title":"xwikis","type":"Summarization","languages":"German, English, French, Czech","summary":"The XWikis Corpus provides datasets with different language pairs and directions for cross-lingual and multi-lingual abstractive document summarisation."},{"id":"SciDuet","title":"SciDuet","type":"Text-to-Slide","languages":"English","summary":"This dataset supports the document-to-slide generation task where a model has to generate presentation slide content from the text of a document."}]},"__N_SSG":true},"page":"/data_cards","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Tasks</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/9e5a1d51e5c0dd74.css" as="style"/><link rel="stylesheet" href="/_next/static/css/9e5a1d51e5c0dd74.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards-bf79efad16f23b88.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><section><h2 class="utils_headingXl__zlq1q">List of Tasks</h2><p class="data_cards_description__V02ne">The list below links to data statements [<a target="_blank" href="https://www.aclweb.org/anthology/Q18-1041/">1</a>, <a target="_blank" href="https://arxiv.org/abs/1803.09010">2</a>] for each of the datasets that are part of GEM tasks. The template used to produce the initial statements and a guide on how to write them can be found here: [<a download="" target="_blank" href="/statement_template.md">download template</a>] [<a href="/tutorials/writing_a_data_card">view guide</a>]. We have released an extended version of this template and an <a target="_blank" href="https://huggingface.co/spaces/GEM/DatasetCardForm">interactive collection tool</a>.</p><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/conversational_weather">conversational_weather</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The purpose of this dataset is to assess how well a model can learn a template-like structure in a very low data setting. The task here is to produce a response to a weather-related query. The reply is further specified through the data attributes and discourse structure in the input. The output contains both the lexicalized text and discourse markers for attributes (e.g., `_ARG_TEMP_ 34`).</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/dart">dart</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">DART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format. The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still retaining the full information.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/e2e_nlg">e2e_nlg</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The E2E NLG dataset is an English benchmark dataset for data-to-text models that verbalize a set of 2-9 key-value attribute pairs in the restaurant domain. The version used for GEM is the cleaned E2E NLG dataset, which filters examples with hallucinations and outputs that don&#x27;t fully cover all input attributes.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/mlb_data_to_text">mlb_data_to_text</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The MLB dataset is an English sport-related data-to-text dataset in the baseball domain. The input is a large table with results of a game and the output is a description of the game.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/RotoWire_English-German">RotoWire_English-German</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English, German</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This dataset is a data-to-text dataset in the basketball domain. The input are tables in a fixed format with statistics about a game (in English) and the target is a German translation of the originally English description. The translations were done by professional translators with basketball experience. The dataset can be used to evaluate the cross-lingual data-to-text capabilities of a model with complex inputs.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/sportsett_basketball">sportsett_basketball</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The sportsett dataset is an English data-to-text dataset in the basketball domain. The inputs are statistics summarizing an NBA game and the outputs are high-quality descriptions of the game in natural language.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/surface_realisation_st_2020">surface_realisation_st_2020</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Arabic, Chinese, English, French, Hindi, Indonesian, Japanese, Korean, Portuguese, Russian, Spanish, Castilian</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This dataset was used as part of the multilingual surface realization shared task in which a model gets full or partial universal dependency structures and has to reconstruct the natural language. This dataset support 11 languages.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/totto">totto</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">ToTTo is a high-quality English table-to-text dataset with more than 100,000 examples in which a table from Wikipedia with highlighted cells is paired with a sentence that describes the highlighted cells. All examples in the dataset were post-edited in multiple steps to ensure that the targets are fully faithful to the input information.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/turku_hockey_data2text">turku_hockey_data2text</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Finnish</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This is a Finnish data-to-text dataset in which the input is structured information about a hockey game and the output a description of the game.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/viggo">viggo</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">ViGGO is an English data-to-text generation dataset in the video game domain, with target responses being more conversational than information-seeking, yet constrained to the information presented in a meaning representation. The dataset is relatively small with about 5,000 datasets but very clean, and can thus serve for evaluating transfer learning, low-resource, or few-shot capabilities of neural models.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/web_nlg">web_nlg</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data-to-Text</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Russian, English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">WebNLG is a bi-lingual dataset (English, Russian) of parallel DBpedia triple sets and short texts that cover about 450 different DBpedia properties. The WebNLG data was originally created to promote the development of RDF verbalisers able to generate short text and to handle micro-planning (i.e., sentence segmentation and ordering, referring expression generation, aggregation); the goal of the task is to generate texts starting from 1 to 7 input triples which have entities in common (so the input is actually a connected Knowledge Graph). The dataset contains about 17,000 triple sets and 45,000 crowdsourced texts in English, and 7,000 triples sets and 19,000 crowdsourced texts in Russian. A challenging test set section with entities and/or properties that have not been seen at training time is available.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/CrossWOZ">CrossWOZ</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Chinese</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">CrossWOZ is a Chinese multi-domain task-oriented dialogue dataset . It contains 6K dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. About 60{\%} of the dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition across domains in conversation.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/cs_restaurants">cs_restaurants</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Czech</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The Czech Restaurants dataset is a task oriented dialog dataset in which a model needs to verbalize a response that a service agent could provide which is specified through a series of dialog acts. The dataset originated as a translation of an English dataset to test the generation capabilities of an NLG system on a highly morphologically rich language like Czech.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/dstc10_track2_task2">dstc10_track2_task2</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">En</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The DSTC10 Track2 Task 2 follows the DSTC9 Track1 task, where participants have to implement knowledge-grounded dialog systems. The training dataset is inherited from the DSTC9 challenge and is in the written domain, while the test set is newly collected and consists of noisy ASR transcripts. Hence, the dataset facilitates building models for grounded dialog response generation.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/RiSAWOZ">RiSAWOZ</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Mandarin Chinese</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">RiSAWOZ is a Chinese dialog dataset. It can be used to study various dialogue tasks, such as Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and Unified Generative Ellipsis and Coreference Resolution.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/schema_guided_dialog">schema_guided_dialog</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The GEM version of this dataset functions as a response generation dataset. The input specifies dialog acts that a model needs to verbalize. The Schema-Guided Dialog dataset is challenging since it comprises multiple domains from hotel and travel to restaurants, and a wide range of dialog acts. The context of each conversation is provided as well.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/Taskmaster">Taskmaster</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Dialog Response Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This is a large task-oriented dialog dataset in which a model has to produce the response. The input contains the context and a structured representation of what the model is supposed to generate. The input is already pre-formatted as string, turning this into a pure text-to-text problem.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/opusparcus">opusparcus</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Paraphrasing</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">German, English, Finnish, French, Russian, Swedish</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">Opusparcus is a paraphrase corpus for six European languages - German, English, Finnish, French, Russian, and Swedish. The paraphrases consist of subtitles from movies and TV shows.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/turku_paraphrase_corpus">turku_paraphrase_corpus</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Paraphrasing</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Finnish</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This is a Finnish paraphrase corpus which consists of pairs of text passages, where a typical passage is about a sentence long. It can be used to either identify or generate paraphrases.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/FairytaleQA">FairytaleQA</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Question Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The FairytaleQA Dataset is an English-language dataset focusing on narrative comprehension of kindergarten to eighth-grade students. Generated by educational experts based on an evidence-based theoretical framework, FairytaleQA consists of 10,580 explicit and implicit questions derived from 278 children-friendly stories, covering seven types of narrative elements or relations. The Dataset was corrected to support both the tasks of Question Generation and Question Answering.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/squad_v2">squad_v2</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Question Generation</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">SQuAD2.0 is a dataset that tests the ability of a system to not only answer reading comprehension questions, but also abstain when presented with a question that cannot be answered based on the provided paragraph.  F1 score is used to evaluate models on the leaderboard. In GEM, we are using this dataset for the question-generation task in which a model should generate squad-like questions from an input text.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/ART">ART</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Reasoning</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">Abductive reasoning is inference to the most plausible explanation. For example, if Jenny finds her house in a mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief broke into her house and caused the mess, as the most plausible explanation.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/common_gen">common_gen</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Reasoning</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">CommonGen is an English text generation task to explicitly test machines for the ability of generative commonsense reasoning. Given a set of common concepts, the task is to generate a coherent sentence describing an everyday scenario using these concepts. CommonGen is challenging because it inherently requires 1) relational reasoning using background commonsense knowledge, and 2) compositional generalization ability to work on unseen concept combinations. The dataset, constructed through a combination of crowd-sourcing from AMT and existing caption corpora, consists of 30k concept-sets and 50k sentences in total. Note that the CommonGen test set is private and requires submission to the external leaderboard.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/BiSECT">BiSECT</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Simplification</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English, German, French, Spanish, Castilian</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This dataset is composed of 1 million complex sentences with the task to split and simplify them while retaining the full meaning. Compared to other simplification corpora, BiSECT requires more significant edits. BiSECT offers splits in English, German, French, and Spanish.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/cochrane-simplification">cochrane-simplification</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Simplification</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">Cochrane is an English dataset for paragraph-level simplification of medical texts. Cochrane is a database of systematic reviews of clinical questions, many of which have summaries in plain English targeting readers without a university education. The dataset comprises about 4,500 of such pairs.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/SIMPITIKI">SIMPITIKI</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Simplification</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Italian</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">SIMPITIKI is an Italian Simplification dataset. Its examples were selected from Italian Wikipedia such that their editing tracking descriptions contain any of the words &quot;Simplified&quot;/&quot;Simplify&quot;/&quot;Simplification&quot;.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/wiki_auto_asset_turk">wiki_auto_asset_turk</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Simplification</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">WikiAuto is an English simplification dataset that we paired with ASSET and TURK, two very high-quality evaluation datasets, as test sets. The input is an English sentence taken from Wikipedia and the target a simplified sentence. ASSET and TURK contain the same test examples but have references that are simplified in different ways (splitting sentences vs. rewriting and splitting).</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/indonlg">indonlg</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Indonesian, Javanese, Sundanese</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">IndoNLG is a collection of various Indonesian, Javanese, and Sundanese NLG tasks including summarization, question answering, chit-chat, and three different pairs of machine translation (MT) tasks.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/mlsum">mlsum</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">German, Spanish, Castilian</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">MLSum is a multilingual summarization dataset crawled from different news websites. The GEM version supports the German and Spanish subset alongside specifically collected challenge sets for COVID-related articles to test out-of-domain generalization.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/OrangeSum">OrangeSum</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">French</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">OrangeSum is a French summarization dataset inspired by XSum. It features two subtasks - abstract generation and title generation. The data was sourced from &quot;Orange Actu&quot; articles between 2011 and 2020.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/squality">squality</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">SQuALITY (Summarization-format QUestion Answering with Long Input Texts, Yes!) is a summarization dataset that is (1) Abstractive, (2) Long-input - The input document are short stories between 3000--6000 words. (3) Question-focused - Each story is associated with multiple question-summary pairs. (4) Multi-reference - Each question is paired with 4 summaries. (5) High-quality - The summaries are crowdsourced from skilled and trained writers.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/wiki_cat_sum">wiki_cat_sum</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">WikiCatSum is an English summarization dataset in three domains - animals, companies, and film. It provides multiple paragraphs of text paired with a summary of the paragraphs.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/wiki_lingua">wiki_lingua</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English, Spanish, Castilian, Portuguese, French, German, Russian, Italian, Indonesian, Dutch, Flemish, Arabic, Chinese, Vietnamese, Thai, Japanese, Korean, Hindi, Czech, Turkish</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">Placeholder</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/xlsum">xlsum</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Amharic, Arabic, Azerbaijani, Bengali, Bangla, Burmese, Chinese (family), English, French, Gujarati, Hausa, Hindi, Igbo, Indonesian, Japanese, Rundi, Korean, Kirghiz, Kyrgyz, Marathi, Nepali (individual language), Oromo, Pushto, Pashto, Persian, Ghanaian Pidgin English, Portuguese, Panjabi, Punjabi, Russian, Scottish Gaelic, Gaelic, Serbian, Romano-Serbian, Sinhala, Sinhalese, Somali, Spanish, Castilian, Swahili (individual language), Kiswahili, Tamil, Telugu, Thai, Tigrinya, Turkish, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yoruba</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">XLSum is a highly multilingual summarization dataset supporting 44 language. The data stems from BBC news articles.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/xsum">xsum</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">XSum is an English news summarization dataset where the task is to predict the first sentence of an article from the rest of it.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/xwikis">xwikis</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Summarization</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">German, English, French, Czech</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">The XWikis Corpus provides datasets with different language pairs and directions for cross-lingual and multi-lingual abstractive document summarisation.</div></li><li class="utils_listItem__6FEiz"><a class="data_cards_larger__T1vAu" href="/data_cards/SciDuet">SciDuet</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Text-to-Slide</small><span class="utils_smallSpace__dcJPu"></span>|<span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">English</small><span class="utils_smallSpace__dcJPu"></span><div class="data_cards_dataset__nB1Jn">This dataset supports the document-to-slide generation task where a model has to generate presentation slide content from the text of a document.</div></li></ul></section></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"allTasksData":[{"id":"conversational_weather","title":"conversational_weather","type":"Data-to-Text","languages":"English","summary":"The purpose of this dataset is to assess how well a model can learn a template-like structure in a very low data setting. The task here is to produce a response to a weather-related query. The reply is further specified through the data attributes and discourse structure in the input. The output contains both the lexicalized text and discourse markers for attributes (e.g., `_ARG_TEMP_ 34`)."},{"id":"dart","title":"dart","type":"Data-to-Text","languages":"English","summary":"DART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format. The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still retaining the full information."},{"id":"e2e_nlg","title":"e2e_nlg","type":"Data-to-Text","languages":"English","summary":"The E2E NLG dataset is an English benchmark dataset for data-to-text models that verbalize a set of 2-9 key-value attribute pairs in the restaurant domain. The version used for GEM is the cleaned E2E NLG dataset, which filters examples with hallucinations and outputs that don't fully cover all input attributes."},{"id":"mlb_data_to_text","title":"mlb_data_to_text","type":"Data-to-Text","languages":"English","summary":"The MLB dataset is an English sport-related data-to-text dataset in the baseball domain. The input is a large table with results of a game and the output is a description of the game."},{"id":"RotoWire_English-German","title":"RotoWire_English-German","type":"Data-to-Text","languages":"English, German","summary":"This dataset is a data-to-text dataset in the basketball domain. The input are tables in a fixed format with statistics about a game (in English) and the target is a German translation of the originally English description. The translations were done by professional translators with basketball experience. The dataset can be used to evaluate the cross-lingual data-to-text capabilities of a model with complex inputs."},{"id":"sportsett_basketball","title":"sportsett_basketball","type":"Data-to-Text","languages":"English","summary":"The sportsett dataset is an English data-to-text dataset in the basketball domain. The inputs are statistics summarizing an NBA game and the outputs are high-quality descriptions of the game in natural language."},{"id":"surface_realisation_st_2020","title":"surface_realisation_st_2020","type":"Data-to-Text","languages":"Arabic, Chinese, English, French, Hindi, Indonesian, Japanese, Korean, Portuguese, Russian, Spanish, Castilian","summary":"This dataset was used as part of the multilingual surface realization shared task in which a model gets full or partial universal dependency structures and has to reconstruct the natural language. This dataset support 11 languages."},{"id":"totto","title":"totto","type":"Data-to-Text","languages":"English","summary":"ToTTo is a high-quality English table-to-text dataset with more than 100,000 examples in which a table from Wikipedia with highlighted cells is paired with a sentence that describes the highlighted cells. All examples in the dataset were post-edited in multiple steps to ensure that the targets are fully faithful to the input information."},{"id":"turku_hockey_data2text","title":"turku_hockey_data2text","type":"Data-to-Text","languages":"Finnish","summary":"This is a Finnish data-to-text dataset in which the input is structured information about a hockey game and the output a description of the game."},{"id":"viggo","title":"viggo","type":"Data-to-Text","languages":"English","summary":"ViGGO is an English data-to-text generation dataset in the video game domain, with target responses being more conversational than information-seeking, yet constrained to the information presented in a meaning representation. The dataset is relatively small with about 5,000 datasets but very clean, and can thus serve for evaluating transfer learning, low-resource, or few-shot capabilities of neural models."},{"id":"web_nlg","title":"web_nlg","type":"Data-to-Text","languages":"Russian, English","summary":"WebNLG is a bi-lingual dataset (English, Russian) of parallel DBpedia triple sets and short texts that cover about 450 different DBpedia properties. The WebNLG data was originally created to promote the development of RDF verbalisers able to generate short text and to handle micro-planning (i.e., sentence segmentation and ordering, referring expression generation, aggregation); the goal of the task is to generate texts starting from 1 to 7 input triples which have entities in common (so the input is actually a connected Knowledge Graph). The dataset contains about 17,000 triple sets and 45,000 crowdsourced texts in English, and 7,000 triples sets and 19,000 crowdsourced texts in Russian. A challenging test set section with entities and/or properties that have not been seen at training time is available."},{"id":"CrossWOZ","title":"CrossWOZ","type":"Dialog Response Generation","languages":"Chinese","summary":"CrossWOZ is a Chinese multi-domain task-oriented dialogue dataset . It contains 6K dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. About 60{\\%} of the dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition across domains in conversation."},{"id":"cs_restaurants","title":"cs_restaurants","type":"Dialog Response Generation","languages":"Czech","summary":"The Czech Restaurants dataset is a task oriented dialog dataset in which a model needs to verbalize a response that a service agent could provide which is specified through a series of dialog acts. The dataset originated as a translation of an English dataset to test the generation capabilities of an NLG system on a highly morphologically rich language like Czech."},{"id":"dstc10_track2_task2","title":"dstc10_track2_task2","type":"Dialog Response Generation","languages":"En","summary":"The DSTC10 Track2 Task 2 follows the DSTC9 Track1 task, where participants have to implement knowledge-grounded dialog systems. The training dataset is inherited from the DSTC9 challenge and is in the written domain, while the test set is newly collected and consists of noisy ASR transcripts. Hence, the dataset facilitates building models for grounded dialog response generation."},{"id":"RiSAWOZ","title":"RiSAWOZ","type":"Dialog Response Generation","languages":"Mandarin Chinese","summary":"RiSAWOZ is a Chinese dialog dataset. It can be used to study various dialogue tasks, such as Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and Unified Generative Ellipsis and Coreference Resolution."},{"id":"schema_guided_dialog","title":"schema_guided_dialog","type":"Dialog Response Generation","languages":"English","summary":"The GEM version of this dataset functions as a response generation dataset. The input specifies dialog acts that a model needs to verbalize. The Schema-Guided Dialog dataset is challenging since it comprises multiple domains from hotel and travel to restaurants, and a wide range of dialog acts. The context of each conversation is provided as well."},{"id":"Taskmaster","title":"Taskmaster","type":"Dialog Response Generation","languages":"English","summary":"This is a large task-oriented dialog dataset in which a model has to produce the response. The input contains the context and a structured representation of what the model is supposed to generate. The input is already pre-formatted as string, turning this into a pure text-to-text problem."},{"id":"opusparcus","title":"opusparcus","type":"Paraphrasing","languages":"German, English, Finnish, French, Russian, Swedish","summary":"Opusparcus is a paraphrase corpus for six European languages - German, English, Finnish, French, Russian, and Swedish. The paraphrases consist of subtitles from movies and TV shows."},{"id":"turku_paraphrase_corpus","title":"turku_paraphrase_corpus","type":"Paraphrasing","languages":"Finnish","summary":"This is a Finnish paraphrase corpus which consists of pairs of text passages, where a typical passage is about a sentence long. It can be used to either identify or generate paraphrases."},{"id":"FairytaleQA","title":"FairytaleQA","type":"Question Generation","languages":"English","summary":"The FairytaleQA Dataset is an English-language dataset focusing on narrative comprehension of kindergarten to eighth-grade students. Generated by educational experts based on an evidence-based theoretical framework, FairytaleQA consists of 10,580 explicit and implicit questions derived from 278 children-friendly stories, covering seven types of narrative elements or relations. The Dataset was corrected to support both the tasks of Question Generation and Question Answering."},{"id":"squad_v2","title":"squad_v2","type":"Question Generation","languages":"English","summary":"SQuAD2.0 is a dataset that tests the ability of a system to not only answer reading comprehension questions, but also abstain when presented with a question that cannot be answered based on the provided paragraph.  F1 score is used to evaluate models on the leaderboard. In GEM, we are using this dataset for the question-generation task in which a model should generate squad-like questions from an input text."},{"id":"ART","title":"ART","type":"Reasoning","languages":"English","summary":"Abductive reasoning is inference to the most plausible explanation. For example, if Jenny finds her house in a mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief broke into her house and caused the mess, as the most plausible explanation.","This data loader focuses on abductive NLG":"a conditional English generation task for explaining given observations in natural language."},{"id":"common_gen","title":"common_gen","type":"Reasoning","languages":"English","summary":"CommonGen is an English text generation task to explicitly test machines for the ability of generative commonsense reasoning. Given a set of common concepts, the task is to generate a coherent sentence describing an everyday scenario using these concepts. CommonGen is challenging because it inherently requires 1) relational reasoning using background commonsense knowledge, and 2) compositional generalization ability to work on unseen concept combinations. The dataset, constructed through a combination of crowd-sourcing from AMT and existing caption corpora, consists of 30k concept-sets and 50k sentences in total. Note that the CommonGen test set is private and requires submission to the external leaderboard."},{"id":"BiSECT","title":"BiSECT","type":"Simplification","languages":"English, German, French, Spanish, Castilian","summary":"This dataset is composed of 1 million complex sentences with the task to split and simplify them while retaining the full meaning. Compared to other simplification corpora, BiSECT requires more significant edits. BiSECT offers splits in English, German, French, and Spanish."},{"id":"cochrane-simplification","title":"cochrane-simplification","type":"Simplification","languages":"English","summary":"Cochrane is an English dataset for paragraph-level simplification of medical texts. Cochrane is a database of systematic reviews of clinical questions, many of which have summaries in plain English targeting readers without a university education. The dataset comprises about 4,500 of such pairs."},{"id":"SIMPITIKI","title":"SIMPITIKI","type":"Simplification","languages":"Italian","summary":"SIMPITIKI is an Italian Simplification dataset. Its examples were selected from Italian Wikipedia such that their editing tracking descriptions contain any of the words \"Simplified\"/\"Simplify\"/\"Simplification\"."},{"id":"wiki_auto_asset_turk","title":"wiki_auto_asset_turk","type":"Simplification","languages":"English","summary":"WikiAuto is an English simplification dataset that we paired with ASSET and TURK, two very high-quality evaluation datasets, as test sets. The input is an English sentence taken from Wikipedia and the target a simplified sentence. ASSET and TURK contain the same test examples but have references that are simplified in different ways (splitting sentences vs. rewriting and splitting)."},{"id":"indonlg","title":"indonlg","type":"Summarization","languages":"Indonesian, Javanese, Sundanese","summary":"IndoNLG is a collection of various Indonesian, Javanese, and Sundanese NLG tasks including summarization, question answering, chit-chat, and three different pairs of machine translation (MT) tasks."},{"id":"mlsum","title":"mlsum","type":"Summarization","languages":"German, Spanish, Castilian","summary":"MLSum is a multilingual summarization dataset crawled from different news websites. The GEM version supports the German and Spanish subset alongside specifically collected challenge sets for COVID-related articles to test out-of-domain generalization."},{"id":"OrangeSum","title":"OrangeSum","type":"Summarization","languages":"French","summary":"OrangeSum is a French summarization dataset inspired by XSum. It features two subtasks - abstract generation and title generation. The data was sourced from \"Orange Actu\" articles between 2011 and 2020."},{"id":"squality","title":"squality","type":"Summarization","languages":"English","summary":"SQuALITY (Summarization-format QUestion Answering with Long Input Texts, Yes!) is a summarization dataset that is (1) Abstractive, (2) Long-input - The input document are short stories between 3000--6000 words. (3) Question-focused - Each story is associated with multiple question-summary pairs. (4) Multi-reference - Each question is paired with 4 summaries. (5) High-quality - The summaries are crowdsourced from skilled and trained writers."},{"id":"wiki_cat_sum","title":"wiki_cat_sum","type":"Summarization","languages":"English","summary":"WikiCatSum is an English summarization dataset in three domains - animals, companies, and film. It provides multiple paragraphs of text paired with a summary of the paragraphs."},{"id":"wiki_lingua","title":"wiki_lingua","type":"Summarization","languages":"English, Spanish, Castilian, Portuguese, French, German, Russian, Italian, Indonesian, Dutch, Flemish, Arabic, Chinese, Vietnamese, Thai, Japanese, Korean, Hindi, Czech, Turkish","summary":"Placeholder"},{"id":"xlsum","title":"xlsum","type":"Summarization","languages":"Amharic, Arabic, Azerbaijani, Bengali, Bangla, Burmese, Chinese (family), English, French, Gujarati, Hausa, Hindi, Igbo, Indonesian, Japanese, Rundi, Korean, Kirghiz, Kyrgyz, Marathi, Nepali (individual language), Oromo, Pushto, Pashto, Persian, Ghanaian Pidgin English, Portuguese, Panjabi, Punjabi, Russian, Scottish Gaelic, Gaelic, Serbian, Romano-Serbian, Sinhala, Sinhalese, Somali, Spanish, Castilian, Swahili (individual language), Kiswahili, Tamil, Telugu, Thai, Tigrinya, Turkish, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yoruba","summary":"XLSum is a highly multilingual summarization dataset supporting 44 language. The data stems from BBC news articles."},{"id":"xsum","title":"xsum","type":"Summarization","languages":"English","summary":"XSum is an English news summarization dataset where the task is to predict the first sentence of an article from the rest of it."},{"id":"xwikis","title":"xwikis","type":"Summarization","languages":"German, English, French, Czech","summary":"The XWikis Corpus provides datasets with different language pairs and directions for cross-lingual and multi-lingual abstractive document summarisation."},{"id":"SciDuet","title":"SciDuet","type":"Text-to-Slide","languages":"English","summary":"This dataset supports the document-to-slide generation task where a model has to generate presentation slide content from the text of a document."}]},"__N_SSG":true},"page":"/data_cards","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/ART.html b/data_cards/ART.html
index 494009d1..93964c81 100644
--- a/data_cards/ART.html
+++ b/data_cards/ART.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->ART</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">ART</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Reasoning</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->ART</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">ART</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Reasoning</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>ART</h2>
@@ -1504,4 +1504,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"ART","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eART\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eAbductive reasoning is inference to the most plausible explanation. For example, if Jenny finds her house in\n          a\n          mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief\n          broke into her house and caused the mess, as the most plausible explanation.\n          This data loader focuses on abductive NLG: a conditional English generation task for explaining given\n          observations in natural language.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/ART')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/ART\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://abductivecommonsense.xyz/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://openreview.net/pdf?id=Byg1v1HKDB\"\u003eOpenReview\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eChandra Bhagavatula (AI2), Ronan Le Bras (AI2), Chaitanya Malaviya (AI2), Keisuke Sakaguchi (AI2), Ari\n          Holtzman\n          (AI2, UW), Hannah Rashkin (AI2, UW), Doug Downey (AI2), Wen-tau Yih (AI2), Yejin Choi (AI2, UW)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eChandra Bhagavatulla\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eautomatically created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or\n                subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://abductivecommonsense.xyz/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://storage.googleapis.com/ai2-mosaic/public/abductive-commonsense-reasoning-iclr2020/anlg.zip\"\u003eGoogle\n                Storage\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://openreview.net/pdf?id=Byg1v1HKDB\"\u003eOpenReview\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL\n                    anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{\nBhagavatula2020Abductive,\ntitle={Abductive Commonsense Reasoning},\nauthor={Chandra Bhagavatula and Ronan Le Bras and Chaitanya Malaviya and Keisuke Sakaguchi and Ari Holtzman and Hannah Rashkin and Doug Downey and Wen-tau Yih and Yejin Choi},\nbooktitle={International Conference on Learning Representations},\nyear={2020},\nurl={https://openreview.net/forum?id=Byg1v1HKDB}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eChandra Bhagavatulla\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:chandrab@allenai.org\"\u003echandrab@allenai.org\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrowdworkers on the Amazon Mechanical Turk platform based in the U.S, Canada, U.K and Australia.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo study the viability of language-based abductive reasoning. Training and evaluating models to generate\n              a\n              plausible hypothesis to explain two given observations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAllen Institute for AI\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eChandra Bhagavatula (AI2), Ronan Le Bras (AI2), Chaitanya Malaviya (AI2), Keisuke Sakaguchi (AI2), Ari\n              Holtzman (AI2, UW), Hannah Rashkin (AI2, UW), Doug Downey (AI2), Wen-tau Yih (AI2), Yejin Choi (AI2, UW)\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAllen Institute for AI\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved\n                    in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eChandra Bhagavatula (AI2), Ronan LeBras (AI2), Aman Madaan (CMU), Nico Daheim (RWTH Aachen University)\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eobservation_1\u003c/code\u003e: A string describing an observation / event.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eobservation_2\u003c/code\u003e: A string describing an observation / event.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003elabel\u003c/code\u003e: A string that plausibly explains why observation_1 and observation_2 might have\n                happened.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eExplanations were authored by crowdworkers on the Amazon Mechanical Turk platform using a custom template\n              designed by the creators of the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'gem_id': 'GEM-ART-validation-0',\n'observation_1': 'Stephen was at a party.',\n'observation_2': 'He checked it but it was completely broken.',\n'label': 'Stephen knocked over a vase while drunk.'\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003etrain\u003c/code\u003e: Consists of training instances.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edev\u003c/code\u003e: Consists of dev instances.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etest\u003c/code\u003e: Consists of test instances.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbductive reasoning is a crucial capability of humans and ART is the first dataset curated to study\n              language-based abductive reasoning.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWhether models can reason abductively about a given pair of observations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/1908.05739\"\u003ePaper\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://github.com/allenai/abductive-commonsense-reasoning\"\u003eCode\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWhether models can reason abductively about a given pair of observations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLanguage producers were English speakers in U.S., Canada, U.K and Australia.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAdversarial filtering algorithm as described in the \u003ca href=\"https://arxiv.org/abs/1908.05739\"\u003epaper\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eautomatically created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach observation is associated with a list of COMET (\u003ca\n                href=\"https://arxiv.org/abs/1906.05317\"\u003ehttps://arxiv.org/abs/1906.05317\u003c/a\u003e) inferences.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators\n                    or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset contains day-to-day events. It does not contain names, emails, addresses etc.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented\n                    in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways\n                    members of different social categories are represented that can have harmful downstream consequences\n                    for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy\n                    to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best\n                    describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"ART","type":"Reasoning","languages":"English","summary":"Abductive reasoning is inference to the most plausible explanation. For example, if Jenny finds her house in a mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief broke into her house and caused the mess, as the most plausible explanation.","This data loader focuses on abductive NLG":"a conditional English generation task for explaining given observations in natural language."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"ART"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"ART","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eART\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eAbductive reasoning is inference to the most plausible explanation. For example, if Jenny finds her house in\n          a\n          mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief\n          broke into her house and caused the mess, as the most plausible explanation.\n          This data loader focuses on abductive NLG: a conditional English generation task for explaining given\n          observations in natural language.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/ART')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/ART\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://abductivecommonsense.xyz/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://openreview.net/pdf?id=Byg1v1HKDB\"\u003eOpenReview\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eChandra Bhagavatula (AI2), Ronan Le Bras (AI2), Chaitanya Malaviya (AI2), Keisuke Sakaguchi (AI2), Ari\n          Holtzman\n          (AI2, UW), Hannah Rashkin (AI2, UW), Doug Downey (AI2), Wen-tau Yih (AI2), Yejin Choi (AI2, UW)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eChandra Bhagavatulla\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eautomatically created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or\n                subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://abductivecommonsense.xyz/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://storage.googleapis.com/ai2-mosaic/public/abductive-commonsense-reasoning-iclr2020/anlg.zip\"\u003eGoogle\n                Storage\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://openreview.net/pdf?id=Byg1v1HKDB\"\u003eOpenReview\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL\n                    anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{\nBhagavatula2020Abductive,\ntitle={Abductive Commonsense Reasoning},\nauthor={Chandra Bhagavatula and Ronan Le Bras and Chaitanya Malaviya and Keisuke Sakaguchi and Ari Holtzman and Hannah Rashkin and Doug Downey and Wen-tau Yih and Yejin Choi},\nbooktitle={International Conference on Learning Representations},\nyear={2020},\nurl={https://openreview.net/forum?id=Byg1v1HKDB}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eChandra Bhagavatulla\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:chandrab@allenai.org\"\u003echandrab@allenai.org\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrowdworkers on the Amazon Mechanical Turk platform based in the U.S, Canada, U.K and Australia.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo study the viability of language-based abductive reasoning. Training and evaluating models to generate\n              a\n              plausible hypothesis to explain two given observations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAllen Institute for AI\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eChandra Bhagavatula (AI2), Ronan Le Bras (AI2), Chaitanya Malaviya (AI2), Keisuke Sakaguchi (AI2), Ari\n              Holtzman (AI2, UW), Hannah Rashkin (AI2, UW), Doug Downey (AI2), Wen-tau Yih (AI2), Yejin Choi (AI2, UW)\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAllen Institute for AI\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved\n                    in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eChandra Bhagavatula (AI2), Ronan LeBras (AI2), Aman Madaan (CMU), Nico Daheim (RWTH Aachen University)\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eobservation_1\u003c/code\u003e: A string describing an observation / event.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eobservation_2\u003c/code\u003e: A string describing an observation / event.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003elabel\u003c/code\u003e: A string that plausibly explains why observation_1 and observation_2 might have\n                happened.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eExplanations were authored by crowdworkers on the Amazon Mechanical Turk platform using a custom template\n              designed by the creators of the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'gem_id': 'GEM-ART-validation-0',\n'observation_1': 'Stephen was at a party.',\n'observation_2': 'He checked it but it was completely broken.',\n'label': 'Stephen knocked over a vase while drunk.'\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003etrain\u003c/code\u003e: Consists of training instances.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edev\u003c/code\u003e: Consists of dev instances.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etest\u003c/code\u003e: Consists of test instances.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbductive reasoning is a crucial capability of humans and ART is the first dataset curated to study\n              language-based abductive reasoning.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWhether models can reason abductively about a given pair of observations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/1908.05739\"\u003ePaper\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://github.com/allenai/abductive-commonsense-reasoning\"\u003eCode\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWhether models can reason abductively about a given pair of observations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLanguage producers were English speakers in U.S., Canada, U.K and Australia.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAdversarial filtering algorithm as described in the \u003ca href=\"https://arxiv.org/abs/1908.05739\"\u003epaper\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eautomatically created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach observation is associated with a list of COMET (\u003ca\n                href=\"https://arxiv.org/abs/1906.05317\"\u003ehttps://arxiv.org/abs/1906.05317\u003c/a\u003e) inferences.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators\n                    or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset contains day-to-day events. It does not contain names, emails, addresses etc.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented\n                    in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways\n                    members of different social categories are represented that can have harmful downstream consequences\n                    for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy\n                    to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best\n                    describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"ART","type":"Reasoning","languages":"English","summary":"Abductive reasoning is inference to the most plausible explanation. For example, if Jenny finds her house in a mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief broke into her house and caused the mess, as the most plausible explanation.","This data loader focuses on abductive NLG":"a conditional English generation task for explaining given observations in natural language."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"ART"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/BiSECT.html b/data_cards/BiSECT.html
index f13bc419..1f3856c1 100644
--- a/data_cards/BiSECT.html
+++ b/data_cards/BiSECT.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->BiSECT</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">BiSECT</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Simplification</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->BiSECT</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">BiSECT</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Simplification</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>BiSECT</h2>
@@ -1714,4 +1714,4 @@ <h5>Technical Limitations
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"BiSECT","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eBiSECT\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis dataset is composed of 1 million complex sentences with the task to split and simplify them while\n          retaining the full meaning. Compared to other simplification corpora, BiSECT requires more significant edits.\n          BiSECT offers splits in English, German, French, and Spanish.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/BiSECT')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/BiSECT\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/mounicam/BiSECT\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.500/\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJoongwon Kim, Mounica Maddela, Reno Kriz\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eTo rewrite a long, complex sentence into shorter, readable, meaning-equivalent sentences.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or\n                subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/mounicam/BiSECT\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/mounicam/BiSECT/tree/main/bisect\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.500/\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL\n                    anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{kim-etal-2021-bisect,\n    title = \"{B}i{SECT}: Learning to Split and Rephrase Sentences with Bitexts\",\n    author = \"Kim, Joongwon  and\n      Maddela, Mounica  and\n      Kriz, Reno  and\n      Xu, Wei  and\n      Callison-Burch, Chris\",\n    booktitle = \"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing\",\n    month = nov,\n    year = \"2021\",\n    address = \"Online and Punta Cana, Dominican Republic\",\n    publisher = \"Association for Computational Linguistics\",\n    url = \"https://aclanthology.org/2021.emnlp-main.500\",\n    pages = \"6193--6209\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJoongwon Kim, Mounica Maddela, Reno Kriz\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jkim0118@seas.upenn.edu\"\u003ejkim0118@seas.upenn.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:mmaddela3@gatech.edu\"\u003emmaddela3@gatech.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:rkriz1@jh.edu\"\u003erkriz1@jh.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSplit and Rephrase.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAdd. License Info\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the 'other' license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is not licensed by itself, and the source OPUS data consists solely of publicly available\n              parallel corpora.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo rewrite a long, complex sentence into shorter, readable, meaning-equivalent sentences.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e (string): a unique identifier for the instance\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esource_sentence\u003c/code\u003e (string): sentence to be simplified\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget_sentence\u003c/code\u003e (string)\" simplified text that was split and rephrased\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n   \"gem_id\": \"bisect-train-0\",\n   \"source_sentence\": \"The report on the visit to Bhutan states that the small community has made the task of coordination less complex and success is manifested in the synchronized programming cycles which now apply to all but one of the agencies ( the World Health Organization ) .\",\n   \"target_sentence\": \"The report on the visit to Bhutan says that the small community has made the coordination work less complex . Success manifests itself in synchronized programming cycles that now apply to all but one organism ( the World Health Organization ) .\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor the main English BiSECT dataset, the splits are as follows: 1. Train (n=928440) 2. Validation\n              (n=9079)\n              3. Test (n=583) Additional challenge sets were derived from the data presented in the paper. Please refer\n              to\n              the challenge set sections. The train/validation/test splits for other languages are as follows: German\n              (n=184638/n=864/n=735) Spanish (n=282944/n=3638/n=3081) French (n=491035/n=2400/n=1036)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWhile all training data were derived from subsets of the OPUS corpora, different source subsets were used\n              for training v.s., validation and testing. The training set comprised more web crawl data, whereas\n              development and test sets comprised EMEA and EU texts. Details can be found in the BiSECT paper.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUnderstanding long and complex sentences is challenging for both humans and NLP models. The BiSECT\n              dataset\n              helps facilitate more research on Split and Rephrase as a task within itself, as well as how it can\n              benefit\n              downstream NLP applications.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBiSECT is the largest available corpora for the Split and Rephrase task. In addition, it has been shown\n              that BiSECT is of higher quality than previous Split and Rephrase corpora and contains a wider variety of\n              splitting operations.\u003c/p\u003e\n            \u003cp\u003eMost previous Split and Rephrase corpora (HSplit-Wiki, Cont-Benchmark, and Wiki-Benchmark) were manually\n              written at a small scale and focused on evaluation, while the one corpus of comparable size, WikiSplit,\n              contains around 25% of pairs contain significant errors. This is because Wikipedia editors are not only\n              trying to split a sentence, but also often simultaneously modifying the sentence for other purposes, which\n              results in changes of the initial meaning.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points added\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original BiSECT training, validation, and test splits are maintained to ensure a fair comparison.\n              Note\n              that the original BiSECT test set was created by manually selecting 583 high-quality Split and Rephrase\n              instances from 1000 random source-target pairs sampled from the EMEA and JRC-Acquis corpora from OPUS.\u003c/p\u003e\n            \u003cp\u003eAs the first challenge set, we include the HSPLIT-Wiki test set, containing 359 pairs. For each complex\n              sentence, there are four reference splits; To ensure replicability, as reference splits, we again follow\n              the\n              BiSECT paper and present only the references from HSplit2-full.\u003c/p\u003e\n            \u003cp\u003eIn addition to the two evaluation sets used in the original BiSECT paper, we also introduce a second\n              challenge set. For this, we initially consider all 7,293 pairs from the EMEA and JRC-Acquis corpora. From\n              there, we classify each pair using the classification algorithm from Section 4.2 of the original BiSECT\n              paper. The three classes are as follows:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eDirect Insertion: when a long sentence l contains two independent clauses and requires only minor\n                changes in order to make a fluent and meaning-preserving split s.\u003c/li\u003e\n              \u003cli\u003eChanges near Split, when l contains one independent and one dependent clause, but modifications are\n                restricted to the region where l is split.\u003c/li\u003e\n              \u003cli\u003eChanges across Sentences, where major changes are required throughout l in order to create a fluent\n                split s.\n                We keep only pairs labeled as Type 3, and after filtering out pairs with significant length differences\n                (signaling potential content addition/deletion), we present a second challenge set of 1,798 pairs.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset can be downloaded from the original repository by the authors.\u003c/p\u003e\n            \u003cp\u003eThe original BiSECT paper proposes several transformer-based models that can be used as baselines, which\n              also compares against Copy512, an LSTM-based model and the previous state-of-the-art.\u003c/p\u003e\n            \u003cp\u003eThe common metric used for automatic evaluation of Split and Rephrase, and sentence simplification more\n              generally is SARI. The BiSECT paper also evaluates using BERTScore. Note that automatic evaluations tend\n              to\n              not correlate well with human judgments, so a human evaluation for quality is generally expected for\n              publication. The original BiSECT paper provides templates for collecting quality annotations from Amazon\n              Mechanical Turk.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eText comprehension (needed to generate meaning-equivalent output) and notions of complexity (what is more\n              'readable' in terms of syntactic structure, lexical choice, punctuation).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSARI is a metric used for evaluating automatic text simplification systems. The metric compares the\n              predicted simplified sentences against the reference and the source sentences. It explicitly measures the\n              goodness of words that are added, deleted and kept by the system.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation)\n                    that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eExisting automatic metrics, such as BLEU (Papineni et al., 2002) and SAMSA (Sulem et al., 2018),\n              are not optimal for the Split and Rephrase task as\n              they rely on lexical overlap between the output and\n              the target (or source) and underestimate the splitting capability of the models that rephrase often.\u003c/p\u003e\n            \u003cp\u003eAs such, the dataset creators focused on BERTScore (Zhang et al., 2020) and SARI (Xu et al., 2016).\n              BERTScore captures meaning preservation and fluency\n              well (Scialom et al., 2021). SARI can provide three\n              separate F1/precision scores that explicitly measure the correctness of inserted, kept and deleted\n              n-grams when compared to both the source and\n              the target. The authors used an extended version of SARI\n              that considers lexical paraphrases of the reference.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBiSECT was constructed to satisfy the need of a Split and Rephrase corpus that is both large-scale and\n              high-quality. Most previous Split and Rephrase corpora (HSplit-Wiki, Cont-Benchmark, and Wiki-Benchmark)\n              were manually written at a small scale and focused on evaluation, while the one corpus of comparable size,\n              WikiSplit, contains around 25% of pairs contain significant errors. This is because Wikipedia editors are\n              not only trying to split a sentence, but also often simultaneously modifying the sentence for other\n              purposes, which results in changes of the initial meaning.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal of Split and Rephrase is to break down longer sentences into multiple shorter sentences, which\n              has\n              downstream applications for many NLP tasks, including machine translation and dependency parsing.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eN/A.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere is a range of topics spanning domains such as web crawl and government documents (European\n              Parliament, United Nations, EMEA).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe construction of the BiSECT corpus relies on leveraging the sentence-level alignments from OPUS), a\n              collection of bilingual parallel corpora over many language pairs. Given a target language A, this work\n              extracts all 1-2 and 2-1 sentence alignments from parallel corpora between A and a set of foreign\n              languages\n              B.\u003c/p\u003e\n            \u003cp\u003eNext, the foreign sentences are translated into English using Google Translate’s Web API service to\n              obtain\n              sentence alignments between a single long sentence and two corresponding split sentences, both in the\n              desired language.\u003c/p\u003e\n            \u003cp\u003eThe authors further filtered the data in a hybrid fashion.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ehybrid\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo remove noise, the authors remove pairs where the single long sentence (l) contains a token with a\n              punctuation after the first two and before the last two alphabetic characters. The authors also removed\n              instances where l contains more than one unconnected component in its dependency tree, generated via\n              SpaCy.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince this data is collected from OPUS, all instances are already in the public domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators\n                    or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented\n                    in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data as provided in GEMv2 is in English, which is a language with abundant existing resources.\n              However,\n              the original paper also provides Split and Rephrase pairs for French, Spanish, and German, while providing\n              a\n              framework for leveraging bilingual corpora from any language pair found within OPUS.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways\n                    members of different social categories are represented that can have harmful downstream consequences\n                    for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution\n                    of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language produced in the dataset is limited to what is captured in the used subset of the OPUS\n              corpora,\n              which might not represent the full distribution of speakers from all locations. For example, the corpora\n              used are from a limited set of relatively formal domains, so it is possible that high performance on the\n              BiSECT test set may not transfer to more informal text.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy\n                    to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince this data is collected from OPUS, all pairs are already in the public domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best\n                    describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when\n                    possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe creation of English BiSECT relies on translating non-English text back to English. While machine\n              translation systems tend to perform well on high-resource languages, there is still a non-negligible\n              chance\n              that there these systems make errors; through a manual evaluation of a subset of BiSECT, it was found that\n              15% of pairs contained significant errors, while an additional 22% contained minor adequacy/fluency\n              errors.\n              This problem is exacerbated slightly when creating German BiSECT (22% significant errors, 24% minor\n              errors),\n              and these numbers would likely get larger if lower-resource languages were used.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"BiSECT","type":"Simplification","languages":"English, German, French, Spanish, Castilian","summary":"This dataset is composed of 1 million complex sentences with the task to split and simplify them while retaining the full meaning. Compared to other simplification corpora, BiSECT requires more significant edits. BiSECT offers splits in English, German, French, and Spanish."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"BiSECT"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"BiSECT","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eBiSECT\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis dataset is composed of 1 million complex sentences with the task to split and simplify them while\n          retaining the full meaning. Compared to other simplification corpora, BiSECT requires more significant edits.\n          BiSECT offers splits in English, German, French, and Spanish.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/BiSECT')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/BiSECT\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/mounicam/BiSECT\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.500/\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJoongwon Kim, Mounica Maddela, Reno Kriz\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eTo rewrite a long, complex sentence into shorter, readable, meaning-equivalent sentences.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or\n                subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/mounicam/BiSECT\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/mounicam/BiSECT/tree/main/bisect\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.500/\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL\n                    anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{kim-etal-2021-bisect,\n    title = \"{B}i{SECT}: Learning to Split and Rephrase Sentences with Bitexts\",\n    author = \"Kim, Joongwon  and\n      Maddela, Mounica  and\n      Kriz, Reno  and\n      Xu, Wei  and\n      Callison-Burch, Chris\",\n    booktitle = \"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing\",\n    month = nov,\n    year = \"2021\",\n    address = \"Online and Punta Cana, Dominican Republic\",\n    publisher = \"Association for Computational Linguistics\",\n    url = \"https://aclanthology.org/2021.emnlp-main.500\",\n    pages = \"6193--6209\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJoongwon Kim, Mounica Maddela, Reno Kriz\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jkim0118@seas.upenn.edu\"\u003ejkim0118@seas.upenn.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:mmaddela3@gatech.edu\"\u003emmaddela3@gatech.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:rkriz1@jh.edu\"\u003erkriz1@jh.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSplit and Rephrase.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAdd. License Info\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the 'other' license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is not licensed by itself, and the source OPUS data consists solely of publicly available\n              parallel corpora.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo rewrite a long, complex sentence into shorter, readable, meaning-equivalent sentences.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e (string): a unique identifier for the instance\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esource_sentence\u003c/code\u003e (string): sentence to be simplified\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget_sentence\u003c/code\u003e (string)\" simplified text that was split and rephrased\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n   \"gem_id\": \"bisect-train-0\",\n   \"source_sentence\": \"The report on the visit to Bhutan states that the small community has made the task of coordination less complex and success is manifested in the synchronized programming cycles which now apply to all but one of the agencies ( the World Health Organization ) .\",\n   \"target_sentence\": \"The report on the visit to Bhutan says that the small community has made the coordination work less complex . Success manifests itself in synchronized programming cycles that now apply to all but one organism ( the World Health Organization ) .\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor the main English BiSECT dataset, the splits are as follows: 1. Train (n=928440) 2. Validation\n              (n=9079)\n              3. Test (n=583) Additional challenge sets were derived from the data presented in the paper. Please refer\n              to\n              the challenge set sections. The train/validation/test splits for other languages are as follows: German\n              (n=184638/n=864/n=735) Spanish (n=282944/n=3638/n=3081) French (n=491035/n=2400/n=1036)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWhile all training data were derived from subsets of the OPUS corpora, different source subsets were used\n              for training v.s., validation and testing. The training set comprised more web crawl data, whereas\n              development and test sets comprised EMEA and EU texts. Details can be found in the BiSECT paper.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUnderstanding long and complex sentences is challenging for both humans and NLP models. The BiSECT\n              dataset\n              helps facilitate more research on Split and Rephrase as a task within itself, as well as how it can\n              benefit\n              downstream NLP applications.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBiSECT is the largest available corpora for the Split and Rephrase task. In addition, it has been shown\n              that BiSECT is of higher quality than previous Split and Rephrase corpora and contains a wider variety of\n              splitting operations.\u003c/p\u003e\n            \u003cp\u003eMost previous Split and Rephrase corpora (HSplit-Wiki, Cont-Benchmark, and Wiki-Benchmark) were manually\n              written at a small scale and focused on evaluation, while the one corpus of comparable size, WikiSplit,\n              contains around 25% of pairs contain significant errors. This is because Wikipedia editors are not only\n              trying to split a sentence, but also often simultaneously modifying the sentence for other purposes, which\n              results in changes of the initial meaning.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points added\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original BiSECT training, validation, and test splits are maintained to ensure a fair comparison.\n              Note\n              that the original BiSECT test set was created by manually selecting 583 high-quality Split and Rephrase\n              instances from 1000 random source-target pairs sampled from the EMEA and JRC-Acquis corpora from OPUS.\u003c/p\u003e\n            \u003cp\u003eAs the first challenge set, we include the HSPLIT-Wiki test set, containing 359 pairs. For each complex\n              sentence, there are four reference splits; To ensure replicability, as reference splits, we again follow\n              the\n              BiSECT paper and present only the references from HSplit2-full.\u003c/p\u003e\n            \u003cp\u003eIn addition to the two evaluation sets used in the original BiSECT paper, we also introduce a second\n              challenge set. For this, we initially consider all 7,293 pairs from the EMEA and JRC-Acquis corpora. From\n              there, we classify each pair using the classification algorithm from Section 4.2 of the original BiSECT\n              paper. The three classes are as follows:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eDirect Insertion: when a long sentence l contains two independent clauses and requires only minor\n                changes in order to make a fluent and meaning-preserving split s.\u003c/li\u003e\n              \u003cli\u003eChanges near Split, when l contains one independent and one dependent clause, but modifications are\n                restricted to the region where l is split.\u003c/li\u003e\n              \u003cli\u003eChanges across Sentences, where major changes are required throughout l in order to create a fluent\n                split s.\n                We keep only pairs labeled as Type 3, and after filtering out pairs with significant length differences\n                (signaling potential content addition/deletion), we present a second challenge set of 1,798 pairs.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset can be downloaded from the original repository by the authors.\u003c/p\u003e\n            \u003cp\u003eThe original BiSECT paper proposes several transformer-based models that can be used as baselines, which\n              also compares against Copy512, an LSTM-based model and the previous state-of-the-art.\u003c/p\u003e\n            \u003cp\u003eThe common metric used for automatic evaluation of Split and Rephrase, and sentence simplification more\n              generally is SARI. The BiSECT paper also evaluates using BERTScore. Note that automatic evaluations tend\n              to\n              not correlate well with human judgments, so a human evaluation for quality is generally expected for\n              publication. The original BiSECT paper provides templates for collecting quality annotations from Amazon\n              Mechanical Turk.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eText comprehension (needed to generate meaning-equivalent output) and notions of complexity (what is more\n              'readable' in terms of syntactic structure, lexical choice, punctuation).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSARI is a metric used for evaluating automatic text simplification systems. The metric compares the\n              predicted simplified sentences against the reference and the source sentences. It explicitly measures the\n              goodness of words that are added, deleted and kept by the system.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation)\n                    that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eExisting automatic metrics, such as BLEU (Papineni et al., 2002) and SAMSA (Sulem et al., 2018),\n              are not optimal for the Split and Rephrase task as\n              they rely on lexical overlap between the output and\n              the target (or source) and underestimate the splitting capability of the models that rephrase often.\u003c/p\u003e\n            \u003cp\u003eAs such, the dataset creators focused on BERTScore (Zhang et al., 2020) and SARI (Xu et al., 2016).\n              BERTScore captures meaning preservation and fluency\n              well (Scialom et al., 2021). SARI can provide three\n              separate F1/precision scores that explicitly measure the correctness of inserted, kept and deleted\n              n-grams when compared to both the source and\n              the target. The authors used an extended version of SARI\n              that considers lexical paraphrases of the reference.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBiSECT was constructed to satisfy the need of a Split and Rephrase corpus that is both large-scale and\n              high-quality. Most previous Split and Rephrase corpora (HSplit-Wiki, Cont-Benchmark, and Wiki-Benchmark)\n              were manually written at a small scale and focused on evaluation, while the one corpus of comparable size,\n              WikiSplit, contains around 25% of pairs contain significant errors. This is because Wikipedia editors are\n              not only trying to split a sentence, but also often simultaneously modifying the sentence for other\n              purposes, which results in changes of the initial meaning.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal of Split and Rephrase is to break down longer sentences into multiple shorter sentences, which\n              has\n              downstream applications for many NLP tasks, including machine translation and dependency parsing.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eN/A.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere is a range of topics spanning domains such as web crawl and government documents (European\n              Parliament, United Nations, EMEA).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe construction of the BiSECT corpus relies on leveraging the sentence-level alignments from OPUS), a\n              collection of bilingual parallel corpora over many language pairs. Given a target language A, this work\n              extracts all 1-2 and 2-1 sentence alignments from parallel corpora between A and a set of foreign\n              languages\n              B.\u003c/p\u003e\n            \u003cp\u003eNext, the foreign sentences are translated into English using Google Translate’s Web API service to\n              obtain\n              sentence alignments between a single long sentence and two corresponding split sentences, both in the\n              desired language.\u003c/p\u003e\n            \u003cp\u003eThe authors further filtered the data in a hybrid fashion.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ehybrid\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo remove noise, the authors remove pairs where the single long sentence (l) contains a token with a\n              punctuation after the first two and before the last two alphabetic characters. The authors also removed\n              instances where l contains more than one unconnected component in its dependency tree, generated via\n              SpaCy.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince this data is collected from OPUS, all instances are already in the public domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators\n                    or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented\n                    in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data as provided in GEMv2 is in English, which is a language with abundant existing resources.\n              However,\n              the original paper also provides Split and Rephrase pairs for French, Spanish, and German, while providing\n              a\n              framework for leveraging bilingual corpora from any language pair found within OPUS.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways\n                    members of different social categories are represented that can have harmful downstream consequences\n                    for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution\n                    of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language produced in the dataset is limited to what is captured in the used subset of the OPUS\n              corpora,\n              which might not represent the full distribution of speakers from all locations. For example, the corpora\n              used are from a limited set of relatively formal domains, so it is possible that high performance on the\n              BiSECT test set may not transfer to more informal text.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy\n                    to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince this data is collected from OPUS, all pairs are already in the public domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best\n                    describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when\n                    possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe creation of English BiSECT relies on translating non-English text back to English. While machine\n              translation systems tend to perform well on high-resource languages, there is still a non-negligible\n              chance\n              that there these systems make errors; through a manual evaluation of a subset of BiSECT, it was found that\n              15% of pairs contained significant errors, while an additional 22% contained minor adequacy/fluency\n              errors.\n              This problem is exacerbated slightly when creating German BiSECT (22% significant errors, 24% minor\n              errors),\n              and these numbers would likely get larger if lower-resource languages were used.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"BiSECT","type":"Simplification","languages":"English, German, French, Spanish, Castilian","summary":"This dataset is composed of 1 million complex sentences with the task to split and simplify them while retaining the full meaning. Compared to other simplification corpora, BiSECT requires more significant edits. BiSECT offers splits in English, German, French, and Spanish."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"BiSECT"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/CrossWOZ.html b/data_cards/CrossWOZ.html
index c1d685b7..e97f381b 100644
--- a/data_cards/CrossWOZ.html
+++ b/data_cards/CrossWOZ.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->CrossWOZ</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">CrossWOZ</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->CrossWOZ</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">CrossWOZ</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>CrossWOZ</h2>
@@ -2772,4 +2772,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"CrossWOZ","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eCrossWOZ\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eCrossWOZ is a Chinese multi-domain task-oriented dialogue dataset . It contains 6K dialogue sessions and 102K\n          utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. About 60{%} of the\n          dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition\n          across domains in conversation.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/CrossWOZ')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/CrossWOZ\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/thu-coai/CrossWOZ\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.tacl-1.19\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eQi Zhu, Kaili Huang, Zheng Zhang, Xiaoyan Zhu, and Minlie Huang from CoAI group, Tsinghua University\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eQi Zhu\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eChinese\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGenerate a response according to the dialog context and database search results.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/thu-coai/CrossWOZ\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/thu-coai/CrossWOZ\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.tacl-1.19\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{zhu-etal-2020-crosswoz,\ntitle = \"{C}ross{WOZ}: A Large-Scale {C}hinese Cross-Domain Task-Oriented Dialogue Dataset\",\nauthor = \"Zhu, Qi  and\nHuang, Kaili  and\nZhang, Zheng  and\nZhu, Xiaoyan  and\nHuang, Minlie\",\njournal = \"Transactions of the Association for Computational Linguistics\",\nvolume = \"8\",\nyear = \"2020\",\nurl = \"https://aclanthology.org/2020.tacl-1.19\",\ndoi = \"10.1162/tacl_a_00314\",\npages = \"281--295\",\nabstract = \"To advance multi-domain (cross-domain) dialogue modeling as well as alleviate the shortage of Chinese task-oriented datasets, we propose CrossWOZ, the first large-scale Chinese Cross-Domain Wizard-of-Oz task-oriented dataset. It contains 6K dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. Moreover, the corpus contains rich annotation of dialogue states and dialogue acts on both user and system sides. About 60{\\%} of the dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition across domains in conversation. We also provide a user simulator and several benchmark models for pipelined task-oriented dialogue systems, which will facilitate researchers to compare and evaluate their models on this corpus. The large size and rich annotation of CrossWOZ make it suitable to investigate a variety of tasks in cross-domain dialogue modeling, such as dialogue state tracking, policy learning, user simulation, etc.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQi Zhu\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:zhuq96@gmail.com\"\u003ezhuq96@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eChinese\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrossWOZ is the first large-scale Chinese Cross-Domain Wizard-of-Oz task-oriented dataset. It contains 6K\n              dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and\n              taxi. Moreover, the corpus contains rich annotation of dialogue states and dialogue acts at both user and\n              system sides. We also provide a user simulator and several benchmark models for pipelined taskoriented\n              dialogue systems, which will facilitate researchers to compare and evaluate their models on this corpus.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate a response according to the dialog context and database search results.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTsinghua University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQi Zhu, Kaili Huang, Zheng Zhang, Xiaoyan Zhu, and Minlie Huang from CoAI group, Tsinghua University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNational Science Foundation of China, National Key R\u0026#x26;D Program of China\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQi Zhu (Tsinghua University)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e (string): GEM-CrossWOZ-{split}-{id}\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edialog_id\u003c/code\u003e (string): dialog ID\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esys_id\u003c/code\u003e (string): system annotator ID\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eusr_id\u003c/code\u003e (string): user annotation ID\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etype\u003c/code\u003e (string): dialog type\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etask description\u003c/code\u003e (list of strings): natural language descriptions of the user goal\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egoal\u003c/code\u003e (list of tuples), includes:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003esub-goal id\u003c/code\u003e (string)\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003edomain name\u003c/code\u003e (string)\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003eslot name\u003c/code\u003e (string)\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003econstraint\u003c/code\u003e if filled, else \u003ccode\u003erequirement\u003c/code\u003e (string)\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003ewhether be mentioned in previous turns\u003c/code\u003e (string)\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003emessages\u003c/code\u003e (list of dict): dialog turns. Each turn contains:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003econtent\u003c/code\u003e (string): utterance\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003erole\u003c/code\u003e (string): user or system\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003edialog_act\u003c/code\u003e (list of tuples), includes:\n                    \u003cul\u003e\n                      \u003cli\u003e\u003ccode\u003edomain\u003c/code\u003e (string)\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eintent\u003c/code\u003e (string)\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eslot\u003c/code\u003e (string)\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003evalue\u003c/code\u003e (string)\u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003euser_state\u003c/code\u003e (list of tuples): same format as \"goal\", can be viewed as dynamic goal.\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003esys_state_init\u003c/code\u003e (dict): the first db query emitted, records user constraints\n                    faithfully. If the system find no result that matches, he/she may relax the constraints manually and\n                    search db multiple times.\n                    \u003cul\u003e\n                      \u003cli\u003e\u003ccode\u003edomain\u003c/code\u003e (dict): slot(string)-value(string) pairs\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eselectedResults\u003c/code\u003e (list of string): db search result that would be used in this\n                        turn.\u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003esys_state\u003c/code\u003e (dict): the final db query emitted, records the db used by the system in\n                    this turn. Same format as sys_state_init. Note that this may not satisfy all user constraints.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003efinal_goal\u003c/code\u003e (list of tuples): user state/goal at the end of dialog. same format as \"goal\".\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'dialog_id': '2303',\n'final_goal': [['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '故宫', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '北京市东城区景山前街4号', 'True'],\n['2', '景点', '电话', '010-85007938', 'True'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'True'],\n['3', '酒店', '电话', '010-84273030', 'True']],\n'gem_id': 'GEM-CrossWOZ-test-0',\n'goal': [['1', '餐馆', '人均消费', '50-100元', 'False'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'False'],\n['1', '餐馆', '名称', '', 'False'],\n['1', '餐馆', '营业时间', '', 'False'],\n['1', '餐馆', '周边景点', '[]', 'False'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'False'],\n['2', '景点', '评分', '4.5分以上', 'False'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n'messages': {'content': ['你好，我想吃美食街，帮我推荐一个人均消费在50-100元的餐馆，谢谢。',\n'为您推荐鲜鱼口老字号美食街，人均消费75元，有您想吃的美食街哦。',\n'营业时间是什么时间？',\n'周一至周日 10:00-22:00。',\n'他家周边有什么景点吗？',\n'有故宫, 前门大街, 恭王府, 天安门广场。',\n'哦，我想在这些附近景点里找一个4.5分以上的，有吗？',\n'故宫就是哦，4.7分。',\n'好的，电话和地址告诉我一下。',\n'010-85007938；北京市东城区景山前街4号。',\n'好的，麻烦你帮我查一下桔子水晶酒店(北京安贞店)电话呗。',\n'010-84273030。',\n'好的，收到，谢谢你！',\n'不客气。'],\n'dialog_act': [[['General', 'greet', 'none', 'none'],\n['General', 'thank', 'none', 'none'],\n['Inform', '餐馆', '人均消费', '50-100元'],\n['Inform', '餐馆', '推荐菜', '美食街'],\n['Request', '餐馆', '名称', '']],\n[['Inform', '餐馆', '人均消费', '75元'], ['Inform', '餐馆', '名称', '鲜鱼口老字号美食街']],\n[['Request', '餐馆', '营业时间', '']],\n[['Inform', '餐馆', '营业时间', '周一至周日 10:00-22:00']],\n[['Request', '餐馆', '周边景点', '']],\n[['Inform', '餐馆', '周边景点', '前门大街'],\n['Inform', '餐馆', '周边景点', '天安门广场'],\n['Inform', '餐馆', '周边景点', '恭王府'],\n['Inform', '餐馆', '周边景点', '故宫']],\n[['Inform', '景点', '评分', '4.5分以上'], ['Select', '景点', '源领域', '餐馆']],\n[['Inform', '景点', '名称', '故宫'], ['Inform', '景点', '评分', '4.7分']],\n[['Request', '景点', '地址', ''], ['Request', '景点', '电话', '']],\n[['Inform', '景点', '地址', '北京市东城区景山前街4号'],\n['Inform', '景点', '电话', '010-85007938']],\n[['Inform', '酒店', '名称', '桔子水晶酒店(北京安贞店)'], ['Request', '酒店', '电话', '']],\n[['Inform', '酒店', '电话', '010-84273030']],\n[['General', 'thank', 'none', 'none']],\n[['General', 'welcome', 'none', 'none']]],\n'role': ['usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys'],\n'sys_state': [{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': ['桔子水晶酒店(北京安贞店)'],\n'价格': '',\n'名称': '桔子水晶酒店(北京安贞店)',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '桔子水晶酒店(北京安贞店)',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}}],\n'sys_state_init': [{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': ['桔子水晶酒店(北京安贞店)'],\n'价格': '',\n'名称': '桔子水晶酒店(北京安贞店)',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': ['桔子水晶酒店(北京安贞店)'],\n'价格': '',\n'名称': '桔子水晶酒店(北京安贞店)',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}}],\n'user_state': [[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '', 'True'],\n['1', '餐馆', '营业时间', '', 'False'],\n['1', '餐馆', '周边景点', '[]', 'False'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'False'],\n['2', '景点', '评分', '4.5分以上', 'False'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '', 'True'],\n['1', '餐馆', '周边景点', '[]', 'False'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'False'],\n['2', '景点', '评分', '4.5分以上', 'False'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', '[]', 'True'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'False'],\n['2', '景点', '评分', '4.5分以上', 'False'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '故宫', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '', 'True'],\n['2', '景点', '电话', '', 'True'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '故宫', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '北京市东城区景山前街4号', 'True'],\n['2', '景点', '电话', '010-85007938', 'True'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'True'],\n['3', '酒店', '电话', '', 'True']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '故宫', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '北京市东城区景山前街4号', 'True'],\n['2', '景点', '电话', '010-85007938', 'True'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'True'],\n['3', '酒店', '电话', '010-84273030', 'True']],\n[]]},\n'sys_id': 96,\n'task description': ['你要去一个餐馆(id=1)用餐。你希望餐馆的人均消费是50-100元的。你想吃的菜肴是美食街。你想知道这个餐馆的名称、营业时间、周边景点。',\n'你要去id=1附近的景点(id=2)游玩。你希望景点的评分是4.5分以上。你想知道这个景点的地址、电话。',\n'你要去名叫桔子水晶酒店(北京安贞店)的酒店(id=3)住宿。你想知道这个酒店的电话。'],\n'type': '不独立多领域',\n'usr_id': 97}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eSplit\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eValid\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# dialogues\u003c/td\u003e\n                    \u003ctd\u003e5,012\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# Turns (utterances)\u003c/td\u003e\n                    \u003ctd\u003e84,692\u003c/td\u003e\n                    \u003ctd\u003e8,458\u003c/td\u003e\n                    \u003ctd\u003e8,476\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eVocab\u003c/td\u003e\n                    \u003ctd\u003e12,502\u003c/td\u003e\n                    \u003ctd\u003e5,202\u003c/td\u003e\n                    \u003ctd\u003e5,143\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAvg. sub-goals\u003c/td\u003e\n                    \u003ctd\u003e3.24\u003c/td\u003e\n                    \u003ctd\u003e3.26\u003c/td\u003e\n                    \u003ctd\u003e3.26\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAvg. semantic tuples\u003c/td\u003e\n                    \u003ctd\u003e14.8\u003c/td\u003e\n                    \u003ctd\u003e14.9\u003c/td\u003e\n                    \u003ctd\u003e15.0\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAvg. turns\u003c/td\u003e\n                    \u003ctd\u003e16.9\u003c/td\u003e\n                    \u003ctd\u003e16.9\u003c/td\u003e\n                    \u003ctd\u003e17.0\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAvg. tokens per turn\u003c/td\u003e\n                    \u003ctd\u003e16.3\u003c/td\u003e\n                    \u003ctd\u003e16.3\u003c/td\u003e\n                    \u003ctd\u003e16.2\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrossWOZ is the first large-scale Chinese Cross-Domain Wizard-of-Oz task-oriented dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus contains rich annotation of dialogue states and dialogue acts at both user and system sides,\n              which can be used in a wide range of tasks.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog understanding, dialog policy learning\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo adapt to hugging face Datasets, we 1) separate user annotators' ID and system annotations' ID; 2) we\n              convert the data type in goal/user state to string.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/thu-coai/Convlab-2\"\u003eCode\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAccording to the type of user goal, we group the dialogues in the training set into five categories:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eS: 417 dialogues have only one sub-goal in HAR domains.\u003c/li\u003e\n              \u003cli\u003eM: 1573 dialogues have multiple sub-goals (2-3) in HAR domains. However, these sub-goals do not have\n                cross-domain informable slots.\u003c/li\u003e\n              \u003cli\u003eM+T: 691 dialogues have multiple sub-goals in HAR domains and at least one sub-goal in the metro or\n                taxi domain (3-5 sub-goals). The sub-goals in HAR domains do not have cross-domain informable slots.\n              \u003c/li\u003e\n              \u003cli\u003eCM: 1,759 dialogues have multiple sub-goals (2-5) in HAR domains with cross-domain informable slots.\n              \u003c/li\u003e\n              \u003cli\u003eCM+T: 572 dialogues have multiple sub-goals in HAR domains with cross-domain informable slots and at\n                least one sub-goal in the metro or taxi domain (3-5 sub-goals).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog understanding, dialog policy learning\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU evaluates the generation quality.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eInform rate: how many entities in the gold response appear in the generated response.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU on MultiWOZ dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGather human-to-human dialog in Chinese.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate a response according to the dialog context and database search results.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eParticipatory experiment\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAn usr/sys ID indicates the creator of different data points.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003edomains: attraction, hotel, restaurant, metro, taxi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators agree using the dataset for research purpose.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAny\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrossWOZ is the first large-scale Chinese Cross-Domain Wizard-of-Oz task-oriented dataset. The corpus\n              contains rich annotation of dialogue states and dialogue acts at both user and system sides, which can be\n              used in a wide range of tasks.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eModel may not handle unknown values in the dialog\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eResponses can be diverse, which is not captured by BLEU\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"CrossWOZ","type":"Dialog Response Generation","languages":"Chinese","summary":"CrossWOZ is a Chinese multi-domain task-oriented dialogue dataset . It contains 6K dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. About 60{\\%} of the dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition across domains in conversation."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"CrossWOZ"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"CrossWOZ","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eCrossWOZ\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eCrossWOZ is a Chinese multi-domain task-oriented dialogue dataset . It contains 6K dialogue sessions and 102K\n          utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. About 60{%} of the\n          dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition\n          across domains in conversation.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/CrossWOZ')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/CrossWOZ\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/thu-coai/CrossWOZ\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.tacl-1.19\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eQi Zhu, Kaili Huang, Zheng Zhang, Xiaoyan Zhu, and Minlie Huang from CoAI group, Tsinghua University\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eQi Zhu\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eChinese\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGenerate a response according to the dialog context and database search results.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/thu-coai/CrossWOZ\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/thu-coai/CrossWOZ\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.tacl-1.19\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{zhu-etal-2020-crosswoz,\ntitle = \"{C}ross{WOZ}: A Large-Scale {C}hinese Cross-Domain Task-Oriented Dialogue Dataset\",\nauthor = \"Zhu, Qi  and\nHuang, Kaili  and\nZhang, Zheng  and\nZhu, Xiaoyan  and\nHuang, Minlie\",\njournal = \"Transactions of the Association for Computational Linguistics\",\nvolume = \"8\",\nyear = \"2020\",\nurl = \"https://aclanthology.org/2020.tacl-1.19\",\ndoi = \"10.1162/tacl_a_00314\",\npages = \"281--295\",\nabstract = \"To advance multi-domain (cross-domain) dialogue modeling as well as alleviate the shortage of Chinese task-oriented datasets, we propose CrossWOZ, the first large-scale Chinese Cross-Domain Wizard-of-Oz task-oriented dataset. It contains 6K dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. Moreover, the corpus contains rich annotation of dialogue states and dialogue acts on both user and system sides. About 60{\\%} of the dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition across domains in conversation. We also provide a user simulator and several benchmark models for pipelined task-oriented dialogue systems, which will facilitate researchers to compare and evaluate their models on this corpus. The large size and rich annotation of CrossWOZ make it suitable to investigate a variety of tasks in cross-domain dialogue modeling, such as dialogue state tracking, policy learning, user simulation, etc.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQi Zhu\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:zhuq96@gmail.com\"\u003ezhuq96@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eChinese\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrossWOZ is the first large-scale Chinese Cross-Domain Wizard-of-Oz task-oriented dataset. It contains 6K\n              dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and\n              taxi. Moreover, the corpus contains rich annotation of dialogue states and dialogue acts at both user and\n              system sides. We also provide a user simulator and several benchmark models for pipelined taskoriented\n              dialogue systems, which will facilitate researchers to compare and evaluate their models on this corpus.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate a response according to the dialog context and database search results.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTsinghua University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQi Zhu, Kaili Huang, Zheng Zhang, Xiaoyan Zhu, and Minlie Huang from CoAI group, Tsinghua University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNational Science Foundation of China, National Key R\u0026#x26;D Program of China\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQi Zhu (Tsinghua University)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e (string): GEM-CrossWOZ-{split}-{id}\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edialog_id\u003c/code\u003e (string): dialog ID\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esys_id\u003c/code\u003e (string): system annotator ID\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eusr_id\u003c/code\u003e (string): user annotation ID\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etype\u003c/code\u003e (string): dialog type\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etask description\u003c/code\u003e (list of strings): natural language descriptions of the user goal\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egoal\u003c/code\u003e (list of tuples), includes:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003esub-goal id\u003c/code\u003e (string)\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003edomain name\u003c/code\u003e (string)\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003eslot name\u003c/code\u003e (string)\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003econstraint\u003c/code\u003e if filled, else \u003ccode\u003erequirement\u003c/code\u003e (string)\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003ewhether be mentioned in previous turns\u003c/code\u003e (string)\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003emessages\u003c/code\u003e (list of dict): dialog turns. Each turn contains:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003econtent\u003c/code\u003e (string): utterance\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003erole\u003c/code\u003e (string): user or system\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003edialog_act\u003c/code\u003e (list of tuples), includes:\n                    \u003cul\u003e\n                      \u003cli\u003e\u003ccode\u003edomain\u003c/code\u003e (string)\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eintent\u003c/code\u003e (string)\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eslot\u003c/code\u003e (string)\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003evalue\u003c/code\u003e (string)\u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003euser_state\u003c/code\u003e (list of tuples): same format as \"goal\", can be viewed as dynamic goal.\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003esys_state_init\u003c/code\u003e (dict): the first db query emitted, records user constraints\n                    faithfully. If the system find no result that matches, he/she may relax the constraints manually and\n                    search db multiple times.\n                    \u003cul\u003e\n                      \u003cli\u003e\u003ccode\u003edomain\u003c/code\u003e (dict): slot(string)-value(string) pairs\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eselectedResults\u003c/code\u003e (list of string): db search result that would be used in this\n                        turn.\u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003esys_state\u003c/code\u003e (dict): the final db query emitted, records the db used by the system in\n                    this turn. Same format as sys_state_init. Note that this may not satisfy all user constraints.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003efinal_goal\u003c/code\u003e (list of tuples): user state/goal at the end of dialog. same format as \"goal\".\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'dialog_id': '2303',\n'final_goal': [['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '故宫', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '北京市东城区景山前街4号', 'True'],\n['2', '景点', '电话', '010-85007938', 'True'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'True'],\n['3', '酒店', '电话', '010-84273030', 'True']],\n'gem_id': 'GEM-CrossWOZ-test-0',\n'goal': [['1', '餐馆', '人均消费', '50-100元', 'False'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'False'],\n['1', '餐馆', '名称', '', 'False'],\n['1', '餐馆', '营业时间', '', 'False'],\n['1', '餐馆', '周边景点', '[]', 'False'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'False'],\n['2', '景点', '评分', '4.5分以上', 'False'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n'messages': {'content': ['你好，我想吃美食街，帮我推荐一个人均消费在50-100元的餐馆，谢谢。',\n'为您推荐鲜鱼口老字号美食街，人均消费75元，有您想吃的美食街哦。',\n'营业时间是什么时间？',\n'周一至周日 10:00-22:00。',\n'他家周边有什么景点吗？',\n'有故宫, 前门大街, 恭王府, 天安门广场。',\n'哦，我想在这些附近景点里找一个4.5分以上的，有吗？',\n'故宫就是哦，4.7分。',\n'好的，电话和地址告诉我一下。',\n'010-85007938；北京市东城区景山前街4号。',\n'好的，麻烦你帮我查一下桔子水晶酒店(北京安贞店)电话呗。',\n'010-84273030。',\n'好的，收到，谢谢你！',\n'不客气。'],\n'dialog_act': [[['General', 'greet', 'none', 'none'],\n['General', 'thank', 'none', 'none'],\n['Inform', '餐馆', '人均消费', '50-100元'],\n['Inform', '餐馆', '推荐菜', '美食街'],\n['Request', '餐馆', '名称', '']],\n[['Inform', '餐馆', '人均消费', '75元'], ['Inform', '餐馆', '名称', '鲜鱼口老字号美食街']],\n[['Request', '餐馆', '营业时间', '']],\n[['Inform', '餐馆', '营业时间', '周一至周日 10:00-22:00']],\n[['Request', '餐馆', '周边景点', '']],\n[['Inform', '餐馆', '周边景点', '前门大街'],\n['Inform', '餐馆', '周边景点', '天安门广场'],\n['Inform', '餐馆', '周边景点', '恭王府'],\n['Inform', '餐馆', '周边景点', '故宫']],\n[['Inform', '景点', '评分', '4.5分以上'], ['Select', '景点', '源领域', '餐馆']],\n[['Inform', '景点', '名称', '故宫'], ['Inform', '景点', '评分', '4.7分']],\n[['Request', '景点', '地址', ''], ['Request', '景点', '电话', '']],\n[['Inform', '景点', '地址', '北京市东城区景山前街4号'],\n['Inform', '景点', '电话', '010-85007938']],\n[['Inform', '酒店', '名称', '桔子水晶酒店(北京安贞店)'], ['Request', '酒店', '电话', '']],\n[['Inform', '酒店', '电话', '010-84273030']],\n[['General', 'thank', 'none', 'none']],\n[['General', 'welcome', 'none', 'none']]],\n'role': ['usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys',\n'usr',\n'sys'],\n'sys_state': [{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': ['桔子水晶酒店(北京安贞店)'],\n'价格': '',\n'名称': '桔子水晶酒店(北京安贞店)',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '桔子水晶酒店(北京安贞店)',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}}],\n'sys_state_init': [{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': ['鲜鱼口老字号美食街'],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': ['故宫'],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': ['桔子水晶酒店(北京安贞店)'],\n'价格': '',\n'名称': '桔子水晶酒店(北京安贞店)',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': [],\n'价格': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '',\n'评分': ''}},\n{'出租': {'selectedResults': [], '出发地': '', '目的地': ''},\n'地铁': {'selectedResults': [], '出发地': '', '目的地': ''},\n'景点': {'selectedResults': [],\n'名称': '故宫',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'游玩时间': '',\n'评分': '',\n'门票': ''},\n'酒店': {'selectedResults': ['桔子水晶酒店(北京安贞店)'],\n'价格': '',\n'名称': '桔子水晶酒店(北京安贞店)',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'评分': '',\n'酒店类型': '',\n'酒店设施': ''},\n'餐馆': {'selectedResults': [],\n'人均消费': '50-100元',\n'名称': '',\n'周边景点': '',\n'周边酒店': '',\n'周边餐馆': '',\n'推荐菜': '美食街',\n'评分': ''}}],\n'user_state': [[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '', 'True'],\n['1', '餐馆', '营业时间', '', 'False'],\n['1', '餐馆', '周边景点', '[]', 'False'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'False'],\n['2', '景点', '评分', '4.5分以上', 'False'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '', 'True'],\n['1', '餐馆', '周边景点', '[]', 'False'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'False'],\n['2', '景点', '评分', '4.5分以上', 'False'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', '[]', 'True'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'False'],\n['2', '景点', '评分', '4.5分以上', 'False'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '出现在id=1的周边景点里', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '', 'False'],\n['2', '景点', '电话', '', 'False'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '故宫', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '', 'True'],\n['2', '景点', '电话', '', 'True'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'False'],\n['3', '酒店', '电话', '', 'False']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '故宫', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '北京市东城区景山前街4号', 'True'],\n['2', '景点', '电话', '010-85007938', 'True'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'True'],\n['3', '酒店', '电话', '', 'True']],\n[],\n[['1', '餐馆', '人均消费', '50-100元', 'True'],\n['1', '餐馆', '推荐菜', \"['美食街']\", 'True'],\n['1', '餐馆', '名称', '鲜鱼口老字号美食街', 'True'],\n['1', '餐馆', '营业时间', '周一至周日 10:00-22:00', 'True'],\n['1', '餐馆', '周边景点', \"['天安门广场', '前门大街', '恭王府', '故宫']\", 'True'],\n['2', '景点', '名称', '故宫', 'True'],\n['2', '景点', '评分', '4.5分以上', 'True'],\n['2', '景点', '地址', '北京市东城区景山前街4号', 'True'],\n['2', '景点', '电话', '010-85007938', 'True'],\n['3', '酒店', '名称', '桔子水晶酒店(北京安贞店)', 'True'],\n['3', '酒店', '电话', '010-84273030', 'True']],\n[]]},\n'sys_id': 96,\n'task description': ['你要去一个餐馆(id=1)用餐。你希望餐馆的人均消费是50-100元的。你想吃的菜肴是美食街。你想知道这个餐馆的名称、营业时间、周边景点。',\n'你要去id=1附近的景点(id=2)游玩。你希望景点的评分是4.5分以上。你想知道这个景点的地址、电话。',\n'你要去名叫桔子水晶酒店(北京安贞店)的酒店(id=3)住宿。你想知道这个酒店的电话。'],\n'type': '不独立多领域',\n'usr_id': 97}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eSplit\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eValid\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# dialogues\u003c/td\u003e\n                    \u003ctd\u003e5,012\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# Turns (utterances)\u003c/td\u003e\n                    \u003ctd\u003e84,692\u003c/td\u003e\n                    \u003ctd\u003e8,458\u003c/td\u003e\n                    \u003ctd\u003e8,476\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eVocab\u003c/td\u003e\n                    \u003ctd\u003e12,502\u003c/td\u003e\n                    \u003ctd\u003e5,202\u003c/td\u003e\n                    \u003ctd\u003e5,143\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAvg. sub-goals\u003c/td\u003e\n                    \u003ctd\u003e3.24\u003c/td\u003e\n                    \u003ctd\u003e3.26\u003c/td\u003e\n                    \u003ctd\u003e3.26\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAvg. semantic tuples\u003c/td\u003e\n                    \u003ctd\u003e14.8\u003c/td\u003e\n                    \u003ctd\u003e14.9\u003c/td\u003e\n                    \u003ctd\u003e15.0\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAvg. turns\u003c/td\u003e\n                    \u003ctd\u003e16.9\u003c/td\u003e\n                    \u003ctd\u003e16.9\u003c/td\u003e\n                    \u003ctd\u003e17.0\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAvg. tokens per turn\u003c/td\u003e\n                    \u003ctd\u003e16.3\u003c/td\u003e\n                    \u003ctd\u003e16.3\u003c/td\u003e\n                    \u003ctd\u003e16.2\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrossWOZ is the first large-scale Chinese Cross-Domain Wizard-of-Oz task-oriented dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus contains rich annotation of dialogue states and dialogue acts at both user and system sides,\n              which can be used in a wide range of tasks.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog understanding, dialog policy learning\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo adapt to hugging face Datasets, we 1) separate user annotators' ID and system annotations' ID; 2) we\n              convert the data type in goal/user state to string.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/thu-coai/Convlab-2\"\u003eCode\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAccording to the type of user goal, we group the dialogues in the training set into five categories:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eS: 417 dialogues have only one sub-goal in HAR domains.\u003c/li\u003e\n              \u003cli\u003eM: 1573 dialogues have multiple sub-goals (2-3) in HAR domains. However, these sub-goals do not have\n                cross-domain informable slots.\u003c/li\u003e\n              \u003cli\u003eM+T: 691 dialogues have multiple sub-goals in HAR domains and at least one sub-goal in the metro or\n                taxi domain (3-5 sub-goals). The sub-goals in HAR domains do not have cross-domain informable slots.\n              \u003c/li\u003e\n              \u003cli\u003eCM: 1,759 dialogues have multiple sub-goals (2-5) in HAR domains with cross-domain informable slots.\n              \u003c/li\u003e\n              \u003cli\u003eCM+T: 572 dialogues have multiple sub-goals in HAR domains with cross-domain informable slots and at\n                least one sub-goal in the metro or taxi domain (3-5 sub-goals).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog understanding, dialog policy learning\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU evaluates the generation quality.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eInform rate: how many entities in the gold response appear in the generated response.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU on MultiWOZ dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGather human-to-human dialog in Chinese.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate a response according to the dialog context and database search results.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eParticipatory experiment\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAn usr/sys ID indicates the creator of different data points.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003edomains: attraction, hotel, restaurant, metro, taxi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators agree using the dataset for research purpose.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAny\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrossWOZ is the first large-scale Chinese Cross-Domain Wizard-of-Oz task-oriented dataset. The corpus\n              contains rich annotation of dialogue states and dialogue acts at both user and system sides, which can be\n              used in a wide range of tasks.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eModel may not handle unknown values in the dialog\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eResponses can be diverse, which is not captured by BLEU\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"CrossWOZ","type":"Dialog Response Generation","languages":"Chinese","summary":"CrossWOZ is a Chinese multi-domain task-oriented dialogue dataset . It contains 6K dialogue sessions and 102K utterances for 5 domains, including hotel, restaurant, attraction, metro, and taxi. About 60{\\%} of the dialogues have cross-domain user goals that favor inter-domain dependency and encourage natural transition across domains in conversation."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"CrossWOZ"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/FairytaleQA.html b/data_cards/FairytaleQA.html
index c0948b2d..3ac01c42 100644
--- a/data_cards/FairytaleQA.html
+++ b/data_cards/FairytaleQA.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->FairytaleQA</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">FairytaleQA</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Question Generation</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->FairytaleQA</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">FairytaleQA</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Question Generation</span><div class="datacard-wrapper"><div class="datacard">
 
 
   <section class="datacard-section">
@@ -2149,4 +2149,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"FairytaleQA","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eFairytaleQA\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe FairytaleQA Dataset is an English-language dataset focusing on narrative comprehension of kindergarten to\n          eighth-grade students. Generated by educational experts based on an evidence-based theoretical framework,\n          FairytaleQA consists of 10,580 explicit and implicit questions derived from 278 children-friendly stories,\n          covering seven types of narrative elements or relations. The Dataset was corrected to support both the tasks\n          of Question Generation and Question Answering.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/FairytaleQA')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/FairytaleQA\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2203.13947\"\u003eArXiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eYing Xu (University of California Irvine); Dakuo Wang (IBM Research); Mo Yu (IBM Research); Daniel Ritchie\n          (University of California Irvine); Bingsheng Yao (Rensselaer Polytechnic Institute); Tongshuang Wu (University\n          of Washington); Zheng Zhang (University of Notre Dame); Toby Jia-Jun Li (University of Notre Dame); Nora\n          Bradford (University of California Irvine); Branda Sun (University of California Irvine); Tran Bao Hoang\n          (University of California Irvine); Yisi Sang (Syracuse University); Yufang Hou (IBM Research Ireland);\n          Xiaojuan Ma (Hong Kong Univ. of Sci and Tech); Diyi Yang (Georgia Institute of Technology); Nanyun Peng\n          (University of California Los Angeles); Zhou Yu (Columbia University); Mark Warschauer (University of\n          California Irvine)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eYing Xu, Dakuo Wang\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunknown: License information unavailable\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe task was to generate questions corresponding to the given answers and the story context. Models trained\n          for this task can potentially enable large-scale development of AI-supported interactive platforms for the\n          learning and assessment of reading comprehension skills.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eexpert created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/uci-soe/FairytaleQAData\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2203.13947\"\u003eArXiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e@inproceedings{xu2022fairytaleqa,\n              author={Xu, Ying and Wang, Dakuo and Yu, Mo and Ritchie, Daniel and Yao, Bingsheng and Wu, Tongshuang and\n              Zhang, Zheng and Li, Toby Jia-Jun and Bradford, Nora and Sun, Branda and Hoang, Tran Bao and Sang, Yisi\n              and Hou, Yufang and Ma, Xiaojuan and Yang, Diyi and Peng, Nanyun and Yu, Zhou and Warschauer, Mark},\n              title = {Fantastic Questions and Where to Find Them: Fairytale{QA} -- An Authentic Dataset for Narrative\n              Comprehension},\n              publisher = {Association for Computational Linguistics},\n              year = {2022}\n              }\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYing Xu, Dakuo Wang\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:ying.xu@uci.edu\"\u003eying.xu@uci.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:dakuo.wang@ibm.com\"\u003edakuo.wang@ibm.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://paperswithcode.com/sota/question-generation-on-fairytaleqa\"\u003ePapersWithCode\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe task was to generate questions corresponding to the given answers and the story context. Success on\n              the Question Generation task is typically measured by achieving a high ROUGE-L score to the reference\n              ground-truth question.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown: License information unavailable\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe purpose of this dataset is to help develop systems to facilitate assessment and training of narrative\n              comprehension skills for children in education domain. The dataset distinguishes fine-grained reading\n              skills, such as the understanding of varying narrative elements, and contains high-quality QA-pairs\n              generated by education experts with sufficient training and education domain knowledge to create valid\n              QA-pairs in a consistent way.\u003c/p\u003e\n            \u003cp\u003eThis dataset is suitable for developing models to automatically generate questions and QA-Pairs that\n              satisfy the need for a continuous supply of new questions, which can potentially enable large-scale\n              development of AI-supported interactive platforms for the learning and assessment of reading comprehension\n              skills.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQuestion Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe task was to generate questions corresponding to the given answers and the story context. Models\n              trained for this task can potentially enable large-scale development of AI-supported interactive platforms\n              for the learning and assessment of reading comprehension skills.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of California Irvine\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYing Xu (University of California Irvine); Dakuo Wang (IBM Research); Mo Yu (IBM Research); Daniel\n              Ritchie (University of California Irvine); Bingsheng Yao (Rensselaer Polytechnic Institute); Tongshuang Wu\n              (University of Washington); Zheng Zhang (University of Notre Dame); Toby Jia-Jun Li (University of Notre\n              Dame); Nora Bradford (University of California Irvine); Branda Sun (University of California Irvine); Tran\n              Bao Hoang (University of California Irvine); Yisi Sang (Syracuse University); Yufang Hou (IBM Research\n              Ireland); Xiaojuan Ma (Hong Kong Univ. of Sci and Tech); Diyi Yang (Georgia Institute of Technology);\n              Nanyun Peng (University of California Los Angeles); Zhou Yu (Columbia University); Mark Warschauer\n              (University of California Irvine)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSchmidt Futures\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDakuo Wang (IBM Research); Bingsheng Yao (Rensselaer Polytechnic Institute); Ying Xu (University of\n              California Irvine)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003estory_name\u003c/code\u003e: a string of the story name to which the story section content belongs. Full\n                  story data can be found \u003ca href=\"https://github.com/uci-soe/FairytaleQAData\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003econtent\u003c/code\u003e: a string of the story section(s) content related to the experts' labeled\n                  QA-pair. Used as the input for both Question Generation and Question Answering tasks.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003equestion\u003c/code\u003e: a string of the question content. Used as the input for Question Answering\n                  task and as the output for Question Generation task.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eanswer\u003c/code\u003e: a string of the answer content for all splits. Used as the input for Question\n                  Generation task and as the output for Question Answering task.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003egem_id\u003c/code\u003e: a string of id follows GEM naming convention\n                  \u003ccode\u003eGEM-${DATASET_NAME}-${SPLIT-NAME}-${id}\u003c/code\u003e where id is an incrementing number starting at 1\n                \u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003etarget\u003c/code\u003e: a string of the question content being used for training\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003ereferences\u003c/code\u003e: a list of string containing the question content being used for automatic\n                  eval\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003elocal_or_sum\u003c/code\u003e: a string of either local or summary, indicating whether the QA is related\n                  to one story section or multiple sections\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eattribute\u003c/code\u003e: a string of one of character, causal relationship, action, setting, feeling,\n                  prediction, or outcome resolution. Classification of the QA by education experts annotators via 7\n                  narrative elements on an established framework\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eex_or_im\u003c/code\u003e: a string of either explicit or implicit, indicating whether the answers can be\n                  directly found in the story content or cannot be directly from the story content.\u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA typical data point comprises a question, the corresponding story content, and one answer. Education\n              expert annotators labeled whether the answer is locally relevant to one story section or requires\n              summarization capabilities from multiple story sections, and whether the answers are explicit (can be\n              directly found in the stories) or implicit (cannot be directly found in the story text). Additionally,\n              education expert annotators categorize the QA-pairs via 7 narrative elements from an establish framework.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{'story_name': 'self-did-it',\n              'content': '\" what is your name ? \" asked the girl from underground . \" self is my name , \" said the woman\n              . that seemed a curious name to the girl , and she once more began to pull the fire apart . then the woman\n              grew angry and began to scold , and built it all up again . thus they went on for a good while ; but at\n              last , while they were in the midst of their pulling apart and building up of the fire , the woman upset\n              the tar - barrel on the girl from underground . then the latter screamed and ran away , crying : \" father\n              , father ! self burned me ! \" \" nonsense , if self did it , then self must suffer for it ! \" came the\n              answer from below the hill .',\n              'answer': 'the woman told the girl her name was self .',\n              'question': \"why did the girl's father think the girl burned herself ?\",\n              'gem_id': 'GEM-FairytaleQA-test-1006',\n              'target': \"why did the girl's father think the girl burned herself ?\",\n              'references': [\"why did the girl's father think the girl burned herself ?\"],\n              'local_or_sum': 'local',\n              'attribute': 'causal relationship',\n              'ex_or_im': 'implicit'}\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is split into a train, validation, and test split randomly. The final split sizes are as\n              follows:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eValidation\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# Books\u003c/td\u003e\n                    \u003ctd\u003e232\u003c/td\u003e\n                    \u003ctd\u003e23\u003c/td\u003e\n                    \u003ctd\u003e23\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# QA-Pairs\u003c/td\u003e\n                    \u003ctd\u003e8548\u003c/td\u003e\n                    \u003ctd\u003e1025\u003c/td\u003e\n                    \u003ctd\u003e1007\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe books are randomly split into train/validation/test splits. We control the ratio of QA-pair numbers\n              in train:validation:test splits close to 8:1:1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset distinguishes fine-grained reading skills, such as the understanding of varying narrative\n              elements, and contains high-quality QA-pairs generated by education experts with sufficient training and\n              education domain knowledge to create valid QA-pairs in a consistent way.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is suitable for developing models to automatically generate questions or QA-pairs that\n              satisfy the need for a continuous supply of new questions, which can potentially enable large-scale\n              development of AI-supported interactive platforms for the learning and assessment of reading comprehension\n              skills.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points removed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original data contains two answers by different annotators in validation/test splits, we removed the\n              2nd answer for GEM version because it is not being used for the Question Generation task.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe are able to measure model's capabilities of generating various types of questions that corresponds to\n              different narrative elements with the FairytaleQA dataset on the Question Generation Task\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe task was to generate questions corresponding to the given answers and the story context. Success on\n              this task is typically measured by achieving a high \u003ca\n                href=\"https://huggingface.co/metrics/rouge\"\u003eROUGE\u003c/a\u003e score to the reference ground-truth questions.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA \u003ca href=\"https://huggingface.co/facebook/bart-large\"\u003eBART-based model\u003c/a\u003e currently achieves a \u003ca\n                href=\"https://github.com/uci-soe/FairytaleQAData\"\u003eROUGE-L of 0.527/0.527\u003c/a\u003e on valid/test splits, which\n              is reported as the baseline experiment for the dataset \u003ca\n                href=\"https://arxiv.org/pdf/2203.13947.pdf\"\u003epaper\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFairytaleQA was built to focus on comprehension of narratives in the education domain, targeting students\n              from kindergarten to eighth grade. We focus on narrative comprehension for 1. it is a high-level\n              comprehension skill strongly predictive of reading achievement and plays a central role in daily life as\n              people frequently encounter narratives in different forms, 2. narrative stories have a clear structure of\n              specific elements and relations among these elements, and there are existing validated narrative\n              comprehension frameworks around this structure, which provides a basis for developing the annotation\n              schema for our dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe purpose of this dataset is to help develop systems to facilitate assessment and training of narrative\n              comprehension skills for children in education domain.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe fairytale story texts are from the \u003ca href=\"https://www.gutenberg.org/\"\u003eProject Gutenberg\u003c/a\u003e website\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe gathered the text from the Project Gutenberg website, using “fairytale” as the search term.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDue to a large number of fairytales found, we used the most popular stories based on the number of\n              downloads since these stories are presumably of higher quality. To ensure the readability of the text, we\n              made a small number of minor revisions to some obviously outdated vocabulary (e.g., changing “ere” to\n              “before”) and the unconventional use of punctuation (e.g., changing consecutive semi-colons to periods).\n            \u003c/p\u003e\n            \u003cp\u003eThese texts were broken down into small sections based on their semantic content by our annotators. The\n              annotators were instructed to split the story into sections of 100-300 words that also contain meaningful\n              content and are separated at natural story breaks. An initial annotator would split the story, and this\n              would be reviewed by a cross-checking annotator. Most of the resulting sections were one natural paragraph\n              of the original text.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emanually\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor each story, we evaluated the reading difficulty level using the \u003ca\n                href=\"https://pypi.org/project/textstat/\"\u003etextstat\u003c/a\u003e Python package, primarily based on sentence\n              length, word length, and commonness of words. We excluded stories that are at 10th grade level or above.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eexpert created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e2\u0026#x3C;n\u0026#x3C;10\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAll of these annotators have a B.A. degree in education, psychology, or cognitive science and have\n              substantial experience in teaching and reading assessment. These annotators were supervised by three\n              experts in literacy education.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e2\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e3\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset annotation distinguishes fine-grained reading skills, such as the understanding of varying\n              narrative elements, and contains high-quality QA-pairs generated by education experts with sufficient\n              training and education domain knowledge to create valid QA-pairs in a consistent way.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curators\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe annotators were instructed to imagine that they were creating questions to test elementary or middle\n              school students in the process of reading a complete story. We required the annotators to generate only\n              natural, open-ended questions, avoiding “yes-” or “no-” questions. We also instructed them to provide a\n              diverse set of questions about 7 different narrative elements, and with both implicit and explicit\n              questions.\u003c/p\u003e\n            \u003cp\u003eWe asked the annotators to also generate answers for each of their questions. We asked them to provide\n              the shortest possible answers but did not restrict them to complete sentences or short phrases. We also\n              asked the annotators to label which section(s) the question and answer was from.\u003c/p\u003e\n            \u003cp\u003eAll annotators received a two-week training in which each of them was familiarized with the coding\n              template and conducted practice coding on the same five stories. The practice QA pairs were then reviewed\n              by the other annotators and the three experts, and discrepancies among annotators were discussed. During\n              the annotation process, the team met once every week to review and discuss each member’s work. All QA\n              pairs were cross-checked by two annotators, and 10% of the QA pairs were additionally checked by the\n              expert supervisor.\u003c/p\u003e\n            \u003cp\u003eFor the 46 stories used as the evaluation set, we annotate a second reference answer by asking an\n              annotator to independently read the story and answer the questions generated by others.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDuring the annotation process, the team met once every week to review and discuss each member’s work. All\n              QA pairs were cross-checked by two annotators, and 10% of the QA pairs were additionally checked by the\n              expert supervisor.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAside from Question Generation task, the data creators and curators used this data for Question\n              Answering, and QA-Pair Generation tasks, and to identify social stereotypes represented in story\n              narratives.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe story content is from publically available knowledge website and the annotated QA-pairs are about\n              general knowledge to the story content without references to the author or to any persons\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eMaintenance Plan Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the original dataset's maintenance plan.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe plan to host various splits for the FairytaleQA dataset to better serve various types of research\n              interests. We have the original data for 2 different split approaches including train/validation/test\n              splits and split by fairytale origins. We are also plan to host the dataset on multiple platforms for\n              various tasks.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMaintainer Contact Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide contact information of a person responsible for the dataset maintenance\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDaniel Ritchie\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny Contestation Mechanism?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the maintenance plan include a contestation mechanism allowing individuals to request removal\n                    fo content?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno mechanism\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes - models trained on this dataset\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSocial Impact Observations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid any of these previous uses result in observations about the social impact of the systems? In\n                    particular, has there been work outlining the risks and limitations of the system? Provide links and\n                    descriptions here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eChanges as Consequence of Social Impact\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHave any changes been made to the dataset as a result of these observations?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFrom the educational perspective, given that reading comprehension is a multicomponent skill, it is ideal\n              for comprehension questions to be able to identify students’ performance in specific sub-skills, thus\n              allowing teachers to provide tailored guidance.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe noticed that human results are obtained via cross-estimation between the two annotated answers, thus\n              are underestimated. One possibility for future work is to conduct a large-scale human annotation to\n              collect more answers per question and then leverage the massively annotated answers to better establish a\n              human performance evaluation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe QA-pairs annotated by education experts are targeting the audience of children from kindergarten to\n              eighth grade, so the difficulty of QA-pairs are not suitable to compare with other existing dataset that\n              are sourced from knowledge graphs or knowledge bases like Wikipedia.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"FairytaleQA","type":"Question Generation","languages":"English","summary":"The FairytaleQA Dataset is an English-language dataset focusing on narrative comprehension of kindergarten to eighth-grade students. Generated by educational experts based on an evidence-based theoretical framework, FairytaleQA consists of 10,580 explicit and implicit questions derived from 278 children-friendly stories, covering seven types of narrative elements or relations. The Dataset was corrected to support both the tasks of Question Generation and Question Answering."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"FairytaleQA"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"FairytaleQA","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eFairytaleQA\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe FairytaleQA Dataset is an English-language dataset focusing on narrative comprehension of kindergarten to\n          eighth-grade students. Generated by educational experts based on an evidence-based theoretical framework,\n          FairytaleQA consists of 10,580 explicit and implicit questions derived from 278 children-friendly stories,\n          covering seven types of narrative elements or relations. The Dataset was corrected to support both the tasks\n          of Question Generation and Question Answering.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/FairytaleQA')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/FairytaleQA\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2203.13947\"\u003eArXiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eYing Xu (University of California Irvine); Dakuo Wang (IBM Research); Mo Yu (IBM Research); Daniel Ritchie\n          (University of California Irvine); Bingsheng Yao (Rensselaer Polytechnic Institute); Tongshuang Wu (University\n          of Washington); Zheng Zhang (University of Notre Dame); Toby Jia-Jun Li (University of Notre Dame); Nora\n          Bradford (University of California Irvine); Branda Sun (University of California Irvine); Tran Bao Hoang\n          (University of California Irvine); Yisi Sang (Syracuse University); Yufang Hou (IBM Research Ireland);\n          Xiaojuan Ma (Hong Kong Univ. of Sci and Tech); Diyi Yang (Georgia Institute of Technology); Nanyun Peng\n          (University of California Los Angeles); Zhou Yu (Columbia University); Mark Warschauer (University of\n          California Irvine)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eYing Xu, Dakuo Wang\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunknown: License information unavailable\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe task was to generate questions corresponding to the given answers and the story context. Models trained\n          for this task can potentially enable large-scale development of AI-supported interactive platforms for the\n          learning and assessment of reading comprehension skills.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eexpert created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/uci-soe/FairytaleQAData\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2203.13947\"\u003eArXiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e@inproceedings{xu2022fairytaleqa,\n              author={Xu, Ying and Wang, Dakuo and Yu, Mo and Ritchie, Daniel and Yao, Bingsheng and Wu, Tongshuang and\n              Zhang, Zheng and Li, Toby Jia-Jun and Bradford, Nora and Sun, Branda and Hoang, Tran Bao and Sang, Yisi\n              and Hou, Yufang and Ma, Xiaojuan and Yang, Diyi and Peng, Nanyun and Yu, Zhou and Warschauer, Mark},\n              title = {Fantastic Questions and Where to Find Them: Fairytale{QA} -- An Authentic Dataset for Narrative\n              Comprehension},\n              publisher = {Association for Computational Linguistics},\n              year = {2022}\n              }\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYing Xu, Dakuo Wang\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:ying.xu@uci.edu\"\u003eying.xu@uci.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:dakuo.wang@ibm.com\"\u003edakuo.wang@ibm.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://paperswithcode.com/sota/question-generation-on-fairytaleqa\"\u003ePapersWithCode\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe task was to generate questions corresponding to the given answers and the story context. Success on\n              the Question Generation task is typically measured by achieving a high ROUGE-L score to the reference\n              ground-truth question.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown: License information unavailable\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe purpose of this dataset is to help develop systems to facilitate assessment and training of narrative\n              comprehension skills for children in education domain. The dataset distinguishes fine-grained reading\n              skills, such as the understanding of varying narrative elements, and contains high-quality QA-pairs\n              generated by education experts with sufficient training and education domain knowledge to create valid\n              QA-pairs in a consistent way.\u003c/p\u003e\n            \u003cp\u003eThis dataset is suitable for developing models to automatically generate questions and QA-Pairs that\n              satisfy the need for a continuous supply of new questions, which can potentially enable large-scale\n              development of AI-supported interactive platforms for the learning and assessment of reading comprehension\n              skills.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQuestion Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe task was to generate questions corresponding to the given answers and the story context. Models\n              trained for this task can potentially enable large-scale development of AI-supported interactive platforms\n              for the learning and assessment of reading comprehension skills.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of California Irvine\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYing Xu (University of California Irvine); Dakuo Wang (IBM Research); Mo Yu (IBM Research); Daniel\n              Ritchie (University of California Irvine); Bingsheng Yao (Rensselaer Polytechnic Institute); Tongshuang Wu\n              (University of Washington); Zheng Zhang (University of Notre Dame); Toby Jia-Jun Li (University of Notre\n              Dame); Nora Bradford (University of California Irvine); Branda Sun (University of California Irvine); Tran\n              Bao Hoang (University of California Irvine); Yisi Sang (Syracuse University); Yufang Hou (IBM Research\n              Ireland); Xiaojuan Ma (Hong Kong Univ. of Sci and Tech); Diyi Yang (Georgia Institute of Technology);\n              Nanyun Peng (University of California Los Angeles); Zhou Yu (Columbia University); Mark Warschauer\n              (University of California Irvine)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSchmidt Futures\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDakuo Wang (IBM Research); Bingsheng Yao (Rensselaer Polytechnic Institute); Ying Xu (University of\n              California Irvine)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003estory_name\u003c/code\u003e: a string of the story name to which the story section content belongs. Full\n                  story data can be found \u003ca href=\"https://github.com/uci-soe/FairytaleQAData\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003econtent\u003c/code\u003e: a string of the story section(s) content related to the experts' labeled\n                  QA-pair. Used as the input for both Question Generation and Question Answering tasks.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003equestion\u003c/code\u003e: a string of the question content. Used as the input for Question Answering\n                  task and as the output for Question Generation task.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eanswer\u003c/code\u003e: a string of the answer content for all splits. Used as the input for Question\n                  Generation task and as the output for Question Answering task.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003egem_id\u003c/code\u003e: a string of id follows GEM naming convention\n                  \u003ccode\u003eGEM-${DATASET_NAME}-${SPLIT-NAME}-${id}\u003c/code\u003e where id is an incrementing number starting at 1\n                \u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003etarget\u003c/code\u003e: a string of the question content being used for training\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003ereferences\u003c/code\u003e: a list of string containing the question content being used for automatic\n                  eval\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003elocal_or_sum\u003c/code\u003e: a string of either local or summary, indicating whether the QA is related\n                  to one story section or multiple sections\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eattribute\u003c/code\u003e: a string of one of character, causal relationship, action, setting, feeling,\n                  prediction, or outcome resolution. Classification of the QA by education experts annotators via 7\n                  narrative elements on an established framework\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eex_or_im\u003c/code\u003e: a string of either explicit or implicit, indicating whether the answers can be\n                  directly found in the story content or cannot be directly from the story content.\u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA typical data point comprises a question, the corresponding story content, and one answer. Education\n              expert annotators labeled whether the answer is locally relevant to one story section or requires\n              summarization capabilities from multiple story sections, and whether the answers are explicit (can be\n              directly found in the stories) or implicit (cannot be directly found in the story text). Additionally,\n              education expert annotators categorize the QA-pairs via 7 narrative elements from an establish framework.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{'story_name': 'self-did-it',\n              'content': '\" what is your name ? \" asked the girl from underground . \" self is my name , \" said the woman\n              . that seemed a curious name to the girl , and she once more began to pull the fire apart . then the woman\n              grew angry and began to scold , and built it all up again . thus they went on for a good while ; but at\n              last , while they were in the midst of their pulling apart and building up of the fire , the woman upset\n              the tar - barrel on the girl from underground . then the latter screamed and ran away , crying : \" father\n              , father ! self burned me ! \" \" nonsense , if self did it , then self must suffer for it ! \" came the\n              answer from below the hill .',\n              'answer': 'the woman told the girl her name was self .',\n              'question': \"why did the girl's father think the girl burned herself ?\",\n              'gem_id': 'GEM-FairytaleQA-test-1006',\n              'target': \"why did the girl's father think the girl burned herself ?\",\n              'references': [\"why did the girl's father think the girl burned herself ?\"],\n              'local_or_sum': 'local',\n              'attribute': 'causal relationship',\n              'ex_or_im': 'implicit'}\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is split into a train, validation, and test split randomly. The final split sizes are as\n              follows:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eValidation\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# Books\u003c/td\u003e\n                    \u003ctd\u003e232\u003c/td\u003e\n                    \u003ctd\u003e23\u003c/td\u003e\n                    \u003ctd\u003e23\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# QA-Pairs\u003c/td\u003e\n                    \u003ctd\u003e8548\u003c/td\u003e\n                    \u003ctd\u003e1025\u003c/td\u003e\n                    \u003ctd\u003e1007\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe books are randomly split into train/validation/test splits. We control the ratio of QA-pair numbers\n              in train:validation:test splits close to 8:1:1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset distinguishes fine-grained reading skills, such as the understanding of varying narrative\n              elements, and contains high-quality QA-pairs generated by education experts with sufficient training and\n              education domain knowledge to create valid QA-pairs in a consistent way.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is suitable for developing models to automatically generate questions or QA-pairs that\n              satisfy the need for a continuous supply of new questions, which can potentially enable large-scale\n              development of AI-supported interactive platforms for the learning and assessment of reading comprehension\n              skills.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points removed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original data contains two answers by different annotators in validation/test splits, we removed the\n              2nd answer for GEM version because it is not being used for the Question Generation task.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe are able to measure model's capabilities of generating various types of questions that corresponds to\n              different narrative elements with the FairytaleQA dataset on the Question Generation Task\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe task was to generate questions corresponding to the given answers and the story context. Success on\n              this task is typically measured by achieving a high \u003ca\n                href=\"https://huggingface.co/metrics/rouge\"\u003eROUGE\u003c/a\u003e score to the reference ground-truth questions.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA \u003ca href=\"https://huggingface.co/facebook/bart-large\"\u003eBART-based model\u003c/a\u003e currently achieves a \u003ca\n                href=\"https://github.com/uci-soe/FairytaleQAData\"\u003eROUGE-L of 0.527/0.527\u003c/a\u003e on valid/test splits, which\n              is reported as the baseline experiment for the dataset \u003ca\n                href=\"https://arxiv.org/pdf/2203.13947.pdf\"\u003epaper\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFairytaleQA was built to focus on comprehension of narratives in the education domain, targeting students\n              from kindergarten to eighth grade. We focus on narrative comprehension for 1. it is a high-level\n              comprehension skill strongly predictive of reading achievement and plays a central role in daily life as\n              people frequently encounter narratives in different forms, 2. narrative stories have a clear structure of\n              specific elements and relations among these elements, and there are existing validated narrative\n              comprehension frameworks around this structure, which provides a basis for developing the annotation\n              schema for our dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe purpose of this dataset is to help develop systems to facilitate assessment and training of narrative\n              comprehension skills for children in education domain.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe fairytale story texts are from the \u003ca href=\"https://www.gutenberg.org/\"\u003eProject Gutenberg\u003c/a\u003e website\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe gathered the text from the Project Gutenberg website, using “fairytale” as the search term.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDue to a large number of fairytales found, we used the most popular stories based on the number of\n              downloads since these stories are presumably of higher quality. To ensure the readability of the text, we\n              made a small number of minor revisions to some obviously outdated vocabulary (e.g., changing “ere” to\n              “before”) and the unconventional use of punctuation (e.g., changing consecutive semi-colons to periods).\n            \u003c/p\u003e\n            \u003cp\u003eThese texts were broken down into small sections based on their semantic content by our annotators. The\n              annotators were instructed to split the story into sections of 100-300 words that also contain meaningful\n              content and are separated at natural story breaks. An initial annotator would split the story, and this\n              would be reviewed by a cross-checking annotator. Most of the resulting sections were one natural paragraph\n              of the original text.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emanually\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor each story, we evaluated the reading difficulty level using the \u003ca\n                href=\"https://pypi.org/project/textstat/\"\u003etextstat\u003c/a\u003e Python package, primarily based on sentence\n              length, word length, and commonness of words. We excluded stories that are at 10th grade level or above.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eexpert created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e2\u0026#x3C;n\u0026#x3C;10\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAll of these annotators have a B.A. degree in education, psychology, or cognitive science and have\n              substantial experience in teaching and reading assessment. These annotators were supervised by three\n              experts in literacy education.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e2\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e3\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset annotation distinguishes fine-grained reading skills, such as the understanding of varying\n              narrative elements, and contains high-quality QA-pairs generated by education experts with sufficient\n              training and education domain knowledge to create valid QA-pairs in a consistent way.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curators\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe annotators were instructed to imagine that they were creating questions to test elementary or middle\n              school students in the process of reading a complete story. We required the annotators to generate only\n              natural, open-ended questions, avoiding “yes-” or “no-” questions. We also instructed them to provide a\n              diverse set of questions about 7 different narrative elements, and with both implicit and explicit\n              questions.\u003c/p\u003e\n            \u003cp\u003eWe asked the annotators to also generate answers for each of their questions. We asked them to provide\n              the shortest possible answers but did not restrict them to complete sentences or short phrases. We also\n              asked the annotators to label which section(s) the question and answer was from.\u003c/p\u003e\n            \u003cp\u003eAll annotators received a two-week training in which each of them was familiarized with the coding\n              template and conducted practice coding on the same five stories. The practice QA pairs were then reviewed\n              by the other annotators and the three experts, and discrepancies among annotators were discussed. During\n              the annotation process, the team met once every week to review and discuss each member’s work. All QA\n              pairs were cross-checked by two annotators, and 10% of the QA pairs were additionally checked by the\n              expert supervisor.\u003c/p\u003e\n            \u003cp\u003eFor the 46 stories used as the evaluation set, we annotate a second reference answer by asking an\n              annotator to independently read the story and answer the questions generated by others.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDuring the annotation process, the team met once every week to review and discuss each member’s work. All\n              QA pairs were cross-checked by two annotators, and 10% of the QA pairs were additionally checked by the\n              expert supervisor.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAside from Question Generation task, the data creators and curators used this data for Question\n              Answering, and QA-Pair Generation tasks, and to identify social stereotypes represented in story\n              narratives.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe story content is from publically available knowledge website and the annotated QA-pairs are about\n              general knowledge to the story content without references to the author or to any persons\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eMaintenance Plan Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the original dataset's maintenance plan.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe plan to host various splits for the FairytaleQA dataset to better serve various types of research\n              interests. We have the original data for 2 different split approaches including train/validation/test\n              splits and split by fairytale origins. We are also plan to host the dataset on multiple platforms for\n              various tasks.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMaintainer Contact Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide contact information of a person responsible for the dataset maintenance\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDaniel Ritchie\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny Contestation Mechanism?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the maintenance plan include a contestation mechanism allowing individuals to request removal\n                    fo content?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno mechanism\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes - models trained on this dataset\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSocial Impact Observations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid any of these previous uses result in observations about the social impact of the systems? In\n                    particular, has there been work outlining the risks and limitations of the system? Provide links and\n                    descriptions here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eChanges as Consequence of Social Impact\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHave any changes been made to the dataset as a result of these observations?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFrom the educational perspective, given that reading comprehension is a multicomponent skill, it is ideal\n              for comprehension questions to be able to identify students’ performance in specific sub-skills, thus\n              allowing teachers to provide tailored guidance.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe noticed that human results are obtained via cross-estimation between the two annotated answers, thus\n              are underestimated. One possibility for future work is to conduct a large-scale human annotation to\n              collect more answers per question and then leverage the massively annotated answers to better establish a\n              human performance evaluation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe QA-pairs annotated by education experts are targeting the audience of children from kindergarten to\n              eighth grade, so the difficulty of QA-pairs are not suitable to compare with other existing dataset that\n              are sourced from knowledge graphs or knowledge bases like Wikipedia.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[N/A]\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"FairytaleQA","type":"Question Generation","languages":"English","summary":"The FairytaleQA Dataset is an English-language dataset focusing on narrative comprehension of kindergarten to eighth-grade students. Generated by educational experts based on an evidence-based theoretical framework, FairytaleQA consists of 10,580 explicit and implicit questions derived from 278 children-friendly stories, covering seven types of narrative elements or relations. The Dataset was corrected to support both the tasks of Question Generation and Question Answering."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"FairytaleQA"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/OrangeSum.html b/data_cards/OrangeSum.html
index b5710baa..454ad80f 100644
--- a/data_cards/OrangeSum.html
+++ b/data_cards/OrangeSum.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->OrangeSum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">OrangeSum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->OrangeSum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">OrangeSum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -822,4 +822,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"OrangeSum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eOrangeSum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eOrangeSum is a French summarization dataset inspired by XSum. It features two subtasks: abstract generation\n          and title generation. The data was sourced from \"Orange Actu\" articles between 2011 and 2020.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/OrangeSum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/OrangeSum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.740\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eFrench\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/Tixierae/OrangeSum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.740\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{kamal-eddine-etal-2021-barthez,\ntitle = \"{BART}hez: a Skilled Pretrained {F}rench Sequence-to-Sequence Model\",\nauthor = \"Kamal Eddine, Moussa  and\nTixier, Antoine  and\nVazirgiannis, Michalis\",\nbooktitle = \"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing\",\nmonth = nov,\nyear = \"2021\",\naddress = \"Online and Punta Cana, Dominican Republic\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2021.emnlp-main.740\",\ndoi = \"10.18653/v1/2021.emnlp-main.740\",\npages = \"9369--9390\",\nabstract = \"Inductive transfer learning has taken the entire NLP field by storm, with models such as BERT and BART setting new state of the art on countless NLU tasks. However, most of the available models and research have been conducted for English. In this work, we introduce BARThez, the first large-scale pretrained seq2seq model for French. Being based on BART, BARThez is particularly well-suited for generative tasks. We evaluate BARThez on five discriminative tasks from the FLUE benchmark and two generative tasks from a novel summarization dataset, OrangeSum, that we created for this research. We show BARThez to be very competitive with state-of-the-art BERT-based French language models such as CamemBERT and FlauBERT. We also continue the pretraining of a multilingual BART on BARThez{'} corpus, and show our resulting model, mBARThez, to significantly boost BARThez{'} generative performance.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFrench\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePapers about abstractive summarization using seq2seq models:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/K16-1028/\"\u003eAbstractive Text Summarization using Sequence-to-sequence\n                  RNNs and Beyond\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/P17-1099/\"\u003eGet To The Point: Summarization with Pointer-Generator\n                  Networks\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.703\"\u003eBART: Denoising Sequence-to-Sequence Pre-training\n                  for Natural Language Generation, Translation, and Comprehension\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.740/\"\u003eBARThez: a Skilled Pretrained French\n                  Sequence-to-Sequence Model\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003ePapers about (pretrained) Transformers:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca\n                  href=\"https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html\"\u003eAttention\n                  is All you Need\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/N19-1423/\"\u003eBERT: Pre-training of Deep Bidirectional Transformers for\n                  Language Understanding\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo unique technical words in this data card.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe ability of the model to generate human like titles and abstracts for given news articles.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatic Evaluation: Rouge-1, Rouge-2, RougeL and BERTScore were used.\u003c/p\u003e\n            \u003cp\u003eHuman evalutaion: a human evaluation study was conducted with 11 French native speakers. The evaluators\n              were PhD students from the computer science department of the university of the authors, working in NLP\n              and other fields of AI. They volunteered after receiving an email announcement. the best-Worst Scaling\n              (Louviere et al.,2015) was used. Two summaries from two different systems, along with their input\n              document, were presented to a human annotator who had to decide which one was better. The evaluators were\n              asked to base their judgments on accuracy (does the summary contain accurate facts?), informativeness (is\n              important in-formation captured?) and fluency (is the summary written in well-formed French?).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset contains news articles written by professional authors.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"OrangeSum","type":"Summarization","languages":"French","summary":"OrangeSum is a French summarization dataset inspired by XSum. It features two subtasks - abstract generation and title generation. The data was sourced from \"Orange Actu\" articles between 2011 and 2020."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"OrangeSum"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"OrangeSum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eOrangeSum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eOrangeSum is a French summarization dataset inspired by XSum. It features two subtasks: abstract generation\n          and title generation. The data was sourced from \"Orange Actu\" articles between 2011 and 2020.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/OrangeSum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/OrangeSum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.740\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eFrench\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/Tixierae/OrangeSum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.740\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{kamal-eddine-etal-2021-barthez,\ntitle = \"{BART}hez: a Skilled Pretrained {F}rench Sequence-to-Sequence Model\",\nauthor = \"Kamal Eddine, Moussa  and\nTixier, Antoine  and\nVazirgiannis, Michalis\",\nbooktitle = \"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing\",\nmonth = nov,\nyear = \"2021\",\naddress = \"Online and Punta Cana, Dominican Republic\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2021.emnlp-main.740\",\ndoi = \"10.18653/v1/2021.emnlp-main.740\",\npages = \"9369--9390\",\nabstract = \"Inductive transfer learning has taken the entire NLP field by storm, with models such as BERT and BART setting new state of the art on countless NLU tasks. However, most of the available models and research have been conducted for English. In this work, we introduce BARThez, the first large-scale pretrained seq2seq model for French. Being based on BART, BARThez is particularly well-suited for generative tasks. We evaluate BARThez on five discriminative tasks from the FLUE benchmark and two generative tasks from a novel summarization dataset, OrangeSum, that we created for this research. We show BARThez to be very competitive with state-of-the-art BERT-based French language models such as CamemBERT and FlauBERT. We also continue the pretraining of a multilingual BART on BARThez{'} corpus, and show our resulting model, mBARThez, to significantly boost BARThez{'} generative performance.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFrench\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePapers about abstractive summarization using seq2seq models:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/K16-1028/\"\u003eAbstractive Text Summarization using Sequence-to-sequence\n                  RNNs and Beyond\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/P17-1099/\"\u003eGet To The Point: Summarization with Pointer-Generator\n                  Networks\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.703\"\u003eBART: Denoising Sequence-to-Sequence Pre-training\n                  for Natural Language Generation, Translation, and Comprehension\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.740/\"\u003eBARThez: a Skilled Pretrained French\n                  Sequence-to-Sequence Model\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003ePapers about (pretrained) Transformers:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca\n                  href=\"https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html\"\u003eAttention\n                  is All you Need\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/N19-1423/\"\u003eBERT: Pre-training of Deep Bidirectional Transformers for\n                  Language Understanding\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo unique technical words in this data card.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe ability of the model to generate human like titles and abstracts for given news articles.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatic Evaluation: Rouge-1, Rouge-2, RougeL and BERTScore were used.\u003c/p\u003e\n            \u003cp\u003eHuman evalutaion: a human evaluation study was conducted with 11 French native speakers. The evaluators\n              were PhD students from the computer science department of the university of the authors, working in NLP\n              and other fields of AI. They volunteered after receiving an email announcement. the best-Worst Scaling\n              (Louviere et al.,2015) was used. Two summaries from two different systems, along with their input\n              document, were presented to a human annotator who had to decide which one was better. The evaluators were\n              asked to base their judgments on accuracy (does the summary contain accurate facts?), informativeness (is\n              important in-formation captured?) and fluency (is the summary written in well-formed French?).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset contains news articles written by professional authors.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"OrangeSum","type":"Summarization","languages":"French","summary":"OrangeSum is a French summarization dataset inspired by XSum. It features two subtasks - abstract generation and title generation. The data was sourced from \"Orange Actu\" articles between 2011 and 2020."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"OrangeSum"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/RiSAWOZ.html b/data_cards/RiSAWOZ.html
index 47619ac3..c8950874 100644
--- a/data_cards/RiSAWOZ.html
+++ b/data_cards/RiSAWOZ.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->RiSAWOZ</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">RiSAWOZ</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->RiSAWOZ</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">RiSAWOZ</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -3122,4 +3122,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"RiSAWOZ","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eRiSAWOZ\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eRiSAWOZ is a Chinese dialog dataset. It can be used to study various dialogue tasks, such as Dialogue State\n          Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and Unified Generative Ellipsis and\n          Coreference Resolution.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/RiSAWOZ')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/RiSAWOZ\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://terryqj0107.github.io/RiSAWOZ_webpage\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.67\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJun Quan (Soochow University, Suzhou, China), Shian Zhang (Soochow University, Suzhou, China), Qian\n          Cao(Soochow University, Suzhou, China), Zizhong Li (Tianjin University, Tianjin, China), Deyi Xiong (Tianjin\n          University, Tianjin, China)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eDeyi Xiong\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eMandarin Chinese\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGenerate system response given dialogue context across multiple domains.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://terryqj0107.github.io/RiSAWOZ_webpage\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/terryqj0107/RiSAWOZ\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.67\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{quan-etal-2020-risawoz,\ntitle = \"{R}i{SAWOZ}: A Large-Scale Multi-Domain {W}izard-of-{O}z Dataset with Rich Semantic Annotations for Task-Oriented Dialogue Modeling\",\nauthor = \"Quan, Jun  and\nZhang, Shian  and\nCao, Qian  and\nLi, Zizhong  and\nXiong, Deyi\",\nbooktitle = \"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)\",\nmonth = nov,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/2020.emnlp-main.67\",\npages = \"930--940\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDeyi Xiong\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:dyxiong@tju.edu.cn\"\u003edyxiong@tju.edu.cn\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOnly Mandarin Chinese is covered in this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMandarin Chinese\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRiSAWOZ can be used to support the study under various dialogue tasks, such as Natural Language\n              Understanding, Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and\n              Unified Generative Ellipsis and Coreference Resolution.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate system response given dialogue context across multiple domains.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSoochow University and Tianjin University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJun Quan (Soochow University, Suzhou, China), Shian Zhang (Soochow University, Suzhou, China), Qian\n              Cao(Soochow University, Suzhou, China), Zizhong Li (Tianjin University, Tianjin, China), Deyi Xiong\n              (Tianjin University, Tianjin, China)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ethe National Key Research and Development Project\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTianhao Shen (Tianjin University, Tianjin, China), Chaobin You (Tianjin University, Tianjin, China), Deyi\n              Xiong (Tianjin University, Tianjin, China)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003egem_id (string): GEM-RiSAWOZ-{split}-{id}\u003c/li\u003e\n              \u003cli\u003edialogue_id (string): dialogue ID\u003c/li\u003e\n              \u003cli\u003egoal (string): natural language descriptions of the user goal\u003c/li\u003e\n              \u003cli\u003edomains (list of strings): domains mentioned in current dialogue session\u003c/li\u003e\n              \u003cli\u003edialogue (list of dicts): dialog turns and corresponding annotations. Each turn includes:\n                \u003cul\u003e\n                  \u003cli\u003eturn_id (int): turn ID\u003c/li\u003e\n                  \u003cli\u003eturn_domain (list of strings): domain mentioned in current turn\u003c/li\u003e\n                  \u003cli\u003euser_utterance (string): user utterance\u003c/li\u003e\n                  \u003cli\u003esystem_utterance (string): system utterance\u003c/li\u003e\n                  \u003cli\u003ebelief_state (dict): dialogue state, including:\n                    \u003cul\u003e\n                      \u003cli\u003einform slot-values (dict): the slots and corresponding values informed until current turn\u003c/li\u003e\n                      \u003cli\u003eturn_inform (dict): the slots and corresponding values informed in current turn\u003c/li\u003e\n                      \u003cli\u003eturn request (dict): the slots requested in current turn\u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003euser_actions (list of lists): user dialogue acts in current turn\u003c/li\u003e\n                  \u003cli\u003euser_actions (list of lists): system dialogue acts in current turn\u003c/li\u003e\n                  \u003cli\u003edb_results (list of strings): database search results\u003c/li\u003e\n                  \u003cli\u003esegmented_user_utterance (string): word segmentation result of user utterance\u003c/li\u003e\n                  \u003cli\u003esegmented_system_utterance (string): word segmentation result of system utterance\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e[\n{\n  \"dialogue_id\": \"attraction_goal_4-63###6177\",\n  \"goal\": \"attraction_goal_4-63: 你是苏州人，但不怎么出去玩。你朋友来苏州找你，你准备带他逛逛“水乡古镇”，你希望客服给你推荐个消费水平“中等”的地方。然后你要问清楚这地方“是否地铁直达”、“特点”、“门票价格”这些信息。最后，你要感谢客服的帮助，然后说再见。\",\n  \"domains\": [\n      \"旅游景点\"\n  ],\n  \"dialogue\": [\n      {\n          \"turn_id\": 0,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"你好，我是苏州人，但是不怎么出去玩，我朋友来苏州找我了，我准备带他逛逛水乡古镇，你能帮我推荐一下吗？\",\n          \"system_utterance\": \"推荐您去周庄古镇。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\"\n              },\n              \"turn_inform\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\"\n              },\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"景点类型\",\n                  \"水乡 古镇\"\n              ],\n              [\n                  \"Greeting\",\n                  \"旅游景点\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Recommend\",\n                  \"旅游景点\",\n                  \"名称\",\n                  \"周庄 古镇\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为8\",\n              \"{'名称': '周庄古镇', '区域': '昆山', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '100元', '电话号码': '400-8282900', '地址': '苏州市昆山市周庄镇全福路43号', '评分': 4.5, '开放时间': '07:30-18:50', '特点': '小桥流水与人家，双桥水巷摇橹船，还有沈万三的足迹待你寻访'}\",\n              \"{'名称': '同里古镇', '区域': '吴江', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '100元', '电话号码': '0512-63325728', '地址': '苏州市吴江区同里古镇', '评分': 4.5, '开放时间': '07:30-17:30', '特点': '五湖环抱的江南水乡古镇，走三桥，参观小巧精致的园林退思园。'}\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\",\n              \"{'名称': '平江路', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-69163907', '地址': '江苏省苏州市姑苏区白塔东路65号', '评分': 4.5, '开放时间': '全天', '特点': '漫步保存完整的古街区，看小桥流水古宅，逛文艺小店，吃美食。'}\",\n              \"{'名称': '木渎古镇', '区域': '吴中区', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '便宜', '是否地铁直达': '否', '门票价格': '免费', '电话号码': '0512-66514042', '地址': '苏州市吴中区木渎镇山塘街188号', '评分': 4.4, '开放时间': '08:30-17:00', '特点': '以园林为特色的古镇，游严家花园等古典园林，坐船看小桥流水。'}\",\n              \"{'名称': '甪直古镇', '区域': '吴中区', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '便宜', '是否地铁直达': '否', '门票价格': '免费', '电话号码': '0512-66191668', '地址': '苏州市吴中区甪直镇晓市路21号', '评分': 4.3, '开放时间': '07:30-17:30', '特点': '甪直古镇有2500多年历史，甪直境内水流纵横，桥梁密布，有五湖之厅、六泽之冲之称。'}\",\n              \"{'名称': '千灯古镇', '区域': '昆山', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '便宜', '是否地铁直达': '否', '门票价格': '免费', '电话号码': '0512-57472155', '地址': '苏州市昆山市千灯古镇尚书路1号', '评分': 4.3, '开放时间': '08:00-17:00', '特点': '千灯古镇，距今已有2500多年的历史，古镇白墙黑瓦，昆韵盎然。'}\",\n              \"{'名称': '锦溪古镇', '区域': '昆山', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '中等', '是否地铁直达': '否', '门票价格': '65元', '电话号码': '0512-57224669', '地址': '苏州市昆山市锦溪镇邵甸港路18号', '评分': 4.4, '开放时间': '08:00-17:00', '特点': '锦溪古镇位于昆山南郊的淀山湖畔，是一座有千年历史的江南水乡。'}\"\n          ],\n          \"segmented_user_utterance\": \"你好 ， 我 是 苏州人 ， 但是 不怎么 出去玩 ， 我 朋友 来 苏州 找 我 了 ， 我 准备 带 他 逛逛 水乡 古镇 ， 你 能 帮 我 推荐 一下 吗 ？\",\n          \"segmented_system_utterance\": \"推荐 您 去 周庄 古镇 。\"\n      },\n      {\n          \"turn_id\": 1,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"我希望这个地方消费水平适中的，这个是中等的吗？\",\n          \"system_utterance\": \"这个不是，我又重新帮您找了一家山塘街。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\"\n              },\n              \"turn_inform\": {\n                  \"旅游景点-消费\": \"中等\"\n              },\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"消费\",\n                  \"中等\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Recommend\",\n                  \"旅游景点\",\n                  \"名称\",\n                  \"山塘 街\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为3\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\",\n              \"{'名称': '平江路', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-69163907', '地址': '江苏省苏州市姑苏区白塔东路65号', '评分': 4.5, '开放时间': '全天', '特点': '漫步保存完整的古街区，看小桥流水古宅，逛文艺小店，吃美食。'}\",\n              \"{'名称': '锦溪古镇', '区域': '昆山', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '中等', '是否地铁直达': '否', '门票价格': '65元', '电话号码': '0512-57224669', '地址': '苏州市昆山市锦溪镇邵甸港路18号', '评分': 4.4, '开放时间': '08:00-17:00', '特点': '锦溪古镇位于昆山南郊的淀山湖畔，是一座有千年历史的江南水乡。'}\"\n          ],\n          \"segmented_user_utterance\": \"我 希望 这个 地方 消费水平 适中 的 ， 这个 是 中等 的 吗 ？\",\n          \"segmented_system_utterance\": \"这个 不是 ， 我 又 重新 帮 您 找 了 一家 山塘 街 。\"\n      },\n      {\n          \"turn_id\": 2,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"听这地方应该挺好玩，他们家特点是什么？\",\n          \"system_utterance\": \"特点是欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\",\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn_inform\": {\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn request\": [\n                  \"特点\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"名称\",\n                  \"山塘 街\"\n              ],\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"特点\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"特点\",\n                  \"欣赏 苏州 老城 风貌 ， 参观 会馆 等 古迹 ， 乘 游船 ， 拍摄 美丽 的 夜景\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\"\n          ],\n          \"segmented_user_utterance\": \"听 这 地方 应该 挺好玩 ， 他们 家 特点 是 什么 ？\",\n          \"segmented_system_utterance\": \"特点 是 欣赏 苏州 老城 风貌 ， 参观 会馆 等 古迹 ， 乘 游船 ， 拍摄 美丽 的 夜景 。\"\n      },\n      {\n          \"turn_id\": 3,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"听起来很不错，他们家门票是多少？\",\n          \"system_utterance\": \"门票免费。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\",\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": [\n                  \"门票价格\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"门票价格\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"门票价格\",\n                  \"免费\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\"\n          ],\n          \"segmented_user_utterance\": \"听 起来 很 不错 ， 他们 家 门票 是 多少 ？\",\n          \"segmented_system_utterance\": \"门票 免费 。\"\n      },\n      {\n          \"turn_id\": 4,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"还行，那他们家是否有地铁直达呢？\",\n          \"system_utterance\": \"嗯嗯，可以地铁直达。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\",\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": [\n                  \"是否地铁直达\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"是否地铁直达\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"是否地铁直达\",\n                  \"是\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\"\n          ],\n          \"segmented_user_utterance\": \"还 行 ， 那 他们 家 是否 有 地铁 直达 呢 ？\",\n          \"segmented_system_utterance\": \"嗯 嗯 ， 可以 地铁 直达 。\"\n      },\n      {\n          \"turn_id\": 5,\n          \"turn_domain\": [\n              \"通用\"\n          ],\n          \"user_utterance\": \"谢谢你的帮助，再见！\",\n          \"system_utterance\": \"再见。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\",\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"Bye\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Bye\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"db_results\": [],\n          \"segmented_user_utterance\": \"谢谢 你 的 帮助 ， 再见 ！\",\n          \"segmented_system_utterance\": \"再见 。\"\n      }\n  ]\n},\n{\n  \"dialogue_id\": \"attraction_goal_3-33_v2###7307\",\n  \"goal\": \"attraction_goal_3-33: 你和朋友一起来苏州出差，顺便想去“苏州乐园森林水世界”逛一逛。因此，你要询问客服这个地方“是否地铁直达”、“评分”、“特点”这些信息。最后，你向客服表示感谢，结束对话。\",\n  \"domains\": [\n      \"旅游景点\"\n  ],\n  \"dialogue\": [\n      {\n          \"turn_id\": 0,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"你好，我和朋友一起来苏州出差，顺便想去苏州乐园森林水世界玩玩，能否帮我看看这个地方是否有地铁直达呢？\",\n          \"system_utterance\": \"稍等，很抱歉，没有地铁能直接到这个景点呢。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn request\": [\n                  \"是否地铁直达\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"名称\",\n                  \"苏州 乐园 森林 水 世界\"\n              ],\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"是否地铁直达\",\n                  \"\"\n              ],\n              [\n                  \"Greeting\",\n                  \"旅游景点\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"是否地铁直达\",\n                  \"否\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '苏州乐园森林水世界', '区域': '高新区', '景点类型': '游乐场', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '198元', '电话号码': '0512-68717107', '地址': '苏州市高新区阳山东路以西，山神湾路以东', '评分': 4.4, '开放时间': '09:00-17:00', '特点': '各项水上娱乐项目皆具森林特色，被赋予森林主题，让人身处浩瀚森林又能享受到海洋般的舒爽水游乐。'}\"\n          ],\n          \"segmented_user_utterance\": \"你好 ， 我 和 朋友 一 起来 苏州 出差 ， 顺便 想 去 苏州 乐园 森林 水 世界 玩玩 ， 能否 帮 我 看看 这个 地方 是否 有 地铁 直达 呢 ？\",\n          \"segmented_system_utterance\": \"稍 等 ， 很 抱歉 ， 没有 地铁 能 直接 到 这个 景点 呢 。\"\n      },\n      {\n          \"turn_id\": 1,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"你再帮我看看景点的评分好吗？\",\n          \"system_utterance\": \"这个景点的评分是4.4分。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": [\n                  \"评分\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"评分\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"评分\",\n                  \"4.4\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '苏州乐园森林水世界', '区域': '高新区', '景点类型': '游乐场', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '198元', '电话号码': '0512-68717107', '地址': '苏州市高新区阳山东路以西，山神湾路以东', '评分': 4.4, '开放时间': '09:00-17:00', '特点': '各项水上娱乐项目皆具森林特色，被赋予森林主题，让人身处浩瀚森林又能享受到海洋般的舒爽水游乐。'}\"\n          ],\n          \"segmented_user_utterance\": \"你 再 帮 我 看看 景点 的 评分 好 吗 ？\",\n          \"segmented_system_utterance\": \"这个 景点 的 评分 是 4.4 分 。\"\n      },\n      {\n          \"turn_id\": 2,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"这个景点有啥特点呢？\",\n          \"system_utterance\": \"这个景点的特点是各项水上娱乐项目皆具森林特色，被赋予森林主题，让人身处浩瀚森林又能享受到海洋般的舒爽水游乐。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": [\n                  \"特点\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"特点\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"特点\",\n                  \"各项 水上 娱乐 项目 皆 具 森林 特色 ， 被 赋予 森林 主题 ， 让 人 身处 浩瀚 森林 又 能 享受 到 海洋 般的 舒爽 水 游乐 。\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '苏州乐园森林水世界', '区域': '高新区', '景点类型': '游乐场', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '198元', '电话号码': '0512-68717107', '地址': '苏州市高新区阳山东路以西，山神湾路以东', '评分': 4.4, '开放时间': '09:00-17:00', '特点': '各项水上娱乐项目皆具森林特色，被赋予森林主题，让人身处浩瀚森林又能享受到海洋般的舒爽水游乐。'}\"\n          ],\n          \"segmented_user_utterance\": \"这个 景点 有 啥 特点 呢 ？\",\n          \"segmented_system_utterance\": \"这个 景点 的 特点 是 各项 水上 娱乐 项目 皆 具 森林 特色 ， 被 赋予 森林 主题 ， 让 人 身处 浩瀚 森林 又 能 享受 到 海洋 般的 舒爽 水 游乐 。\"\n      },\n      {\n          \"turn_id\": 3,\n          \"turn_domain\": [\n              \"通用\"\n          ],\n          \"user_utterance\": \"太好了，我觉得我会很喜欢这个地方，哈哈。\",\n          \"system_utterance\": \"这个景点挺不错的，就是没有地铁需要您乘坐其它交通工具了。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"General\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"General\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"db_results\": [],\n          \"segmented_user_utterance\": \"太好了 ， 我 觉得 我会 很 喜欢 这个 地方 ， 哈哈 。\",\n          \"segmented_system_utterance\": \"这个 景点 挺不错 的 ， 就是 没有 地铁 需要 您 乘坐 其它 交通工具 了 。\"\n      },\n      {\n          \"turn_id\": 4,\n          \"turn_domain\": [\n              \"通用\"\n          ],\n          \"user_utterance\": \"我打出租车去吧，就是多花点车费，没事。\",\n          \"system_utterance\": \"请问您还有其他问题吗？\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"General\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"General\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"db_results\": [],\n          \"segmented_user_utterance\": \"我 打 出租车 去 吧 ， 就是 多花点 车费 ， 没事 。\",\n          \"segmented_system_utterance\": \"请问 您 还有 其他 问题 吗 ？\"\n      },\n      {\n          \"turn_id\": 5,\n          \"turn_domain\": [\n              \"通用\"\n          ],\n          \"user_utterance\": \"没有了。谢谢你，再见。\",\n          \"system_utterance\": \"再见。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"Bye\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Bye\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"db_results\": [],\n          \"segmented_user_utterance\": \"没有 了 。 谢谢 你 ， 再见 。\",\n          \"segmented_system_utterance\": \"再见 。\"\n      }\n  ]\n}\n]\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTrain:\n                \u003cul\u003e\n                  \u003cli\u003edialogues: 10000\u003c/li\u003e\n                  \u003cli\u003eturns: 134580\u003c/li\u003e\n                  \u003cli\u003eAvg. turns: 13.5\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eValid:\n                \u003cul\u003e\n                  \u003cli\u003edialogues: 600\u003c/li\u003e\n                  \u003cli\u003eturns: 8116\u003c/li\u003e\n                  \u003cli\u003eAvg. turns: 13.5\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eTest:\n                \u003cul\u003e\n                  \u003cli\u003edialogues: 600\u003c/li\u003e\n                  \u003cli\u003eturn: 9286\u003c/li\u003e\n                  \u003cli\u003eAvg. turns: 15.5\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ethe most complex example (i.e. with the most domains):\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"dialogue_id\": \"attraction_restaurant_hotel_goal_2-69_v2###8355\",\n\"goal\": \"attraction_restaurant_hotel_goal_2-69: 公司派你去苏州出差，你准备抽空旅游一下。第一，你想去一个“山水景区”，消费水平最好是“偏贵”的，你需要问清楚这地方“开放时间”、“门票价格”这些信息。第二，你要找个地方住，让客服给你推荐一家价位“中等”的“商务出行”酒店，并且询问这家酒店的“停车场”、“电话号码”这些信息。第三，你还想拔草美食，问客服有没有价位“中等”的“江浙菜”餐厅推荐，然后了解这家餐厅的“是否地铁直达”、“人均消费”这些信息。最后你要感谢客服的帮助，并说再见。\",\n\"domains\": [\n  \"旅游景点\",\n  \"餐厅\",\n  \"酒店\"\n],\n\"dialogue\": [\n  {\n      \"turn_id\": 0,\n      \"turn_domain\": [\n          \"旅游景点\"\n      ],\n      \"user_utterance\": \"你好，来苏州出差，准备抽空旅游一下。我想去山水景区，消费水平偏贵点，帮忙推荐一个。\",\n      \"system_utterance\": \"你好，向您推荐金鸡湖景区，感受一下苏州现代化的一面。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\"\n          },\n          \"turn_inform\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\"\n          },\n          \"turn request\": []\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"景点类型\",\n              \"山水 景区\"\n          ],\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"消费\",\n              \"偏贵\"\n          ],\n          [\n              \"Greeting\",\n              \"旅游景点\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"旅游景点\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Greeting\",\n              \"通用\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"Recommend\",\n              \"旅游景点\",\n              \"名称\",\n              \"金鸡湖 景区\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为2\",\n          \"{'名称': '金鸡湖景区', '区域': '工业园区', '景点类型': '山水景区', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '400-7558558', '地址': '苏州市工业园区星港街158号', '评分': 4.5, '开放时间': '全天', '特点': '看东方之门等高楼，坐摩天轮，乘船夜游，感受苏州现代化的一面。'}\",\n          \"{'名称': '沙家浜风景区', '区域': '常熟', '景点类型': '山水景区', '最适合人群': '家庭亲子', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '110元', '电话号码': '0512-52500000', '地址': '苏州市常熟市沙家浜镇', '评分': 4.5, '开放时间': '08:00-16:30', '特点': '京剧样板戏《沙家浜》的发源地，访剧中场景，坐船深入芦苇荡。'}\"\n      ],\n      \"segmented_user_utterance\": \"你好 ， 来 苏州 出差 ， 准备 抽空 旅游 一下 。 我 想 去 山水 景区 ， 消费水平 偏 贵点 ， 帮忙 推荐 一个 。\",\n      \"segmented_system_utterance\": \"你好 ， 向 您 推荐 金鸡湖 景区 ， 感受一下 苏州 现代化 的 一面 。\"\n  },\n  {\n      \"turn_id\": 1,\n      \"turn_domain\": [\n          \"旅游景点\"\n      ],\n      \"user_utterance\": \"这里开放时间？需要安排一下。\",\n      \"system_utterance\": \"景区是全天开放的，可以尽情游览！\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\"\n          },\n          \"turn_inform\": {\n              \"旅游景点-名称\": \"金鸡湖 景区\"\n          },\n          \"turn request\": [\n              \"开放时间\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"名称\",\n              \"金鸡湖 景区\"\n          ],\n          [\n              \"Request\",\n              \"旅游景点\",\n              \"开放时间\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"旅游景点\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"开放时间\",\n              \"全天\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为1\",\n          \"{'名称': '金鸡湖景区', '区域': '工业园区', '景点类型': '山水景区', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '400-7558558', '地址': '苏州市工业园区星港街158号', '评分': 4.5, '开放时间': '全天', '特点': '看东方之门等高楼，坐摩天轮，乘船夜游，感受苏州现代化的一面。'}\"\n      ],\n      \"segmented_user_utterance\": \"这里 开放 时间 ？ 需要 安排 一下 。\",\n      \"segmented_system_utterance\": \"景区 是 全天 开放 的 ， 可以 尽情 游览 ！\"\n  },\n  {\n      \"turn_id\": 2,\n      \"turn_domain\": [\n          \"旅游景点\"\n      ],\n      \"user_utterance\": \"门票多少钱？\",\n      \"system_utterance\": \"这里是免费景区。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\"\n          },\n          \"turn_inform\": {},\n          \"turn request\": [\n              \"门票价格\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Request\",\n              \"旅游景点\",\n              \"门票价格\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"门票价格\",\n              \"免费\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为1\",\n          \"{'名称': '金鸡湖景区', '区域': '工业园区', '景点类型': '山水景区', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '400-7558558', '地址': '苏州市工业园区星港街158号', '评分': 4.5, '开放时间': '全天', '特点': '看东方之门等高楼，坐摩天轮，乘船夜游，感受苏州现代化的一面。'}\"\n      ],\n      \"segmented_user_utterance\": \"门票 多少 钱 ？\",\n      \"segmented_system_utterance\": \"这里 是 免费 景区 。\"\n  },\n  {\n      \"turn_id\": 3,\n      \"turn_domain\": [\n          \"酒店\"\n      ],\n      \"user_utterance\": \"哦，好的。还有酒店，帮我推荐个价位中等的商务酒店！\",\n      \"system_utterance\": \"推荐苏州金鸡湖李公堤瑞贝庭公寓酒店，在金鸡湖景区附近。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\"\n          },\n          \"turn_inform\": {\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\"\n          },\n          \"turn request\": []\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"酒店\",\n              \"价位\",\n              \"中等\"\n          ],\n          [\n              \"Inform\",\n              \"酒店\",\n              \"酒店类型\",\n              \"商务 出行\"\n          ],\n          [\n              \"General\",\n              \"酒店\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Recommend\",\n              \"酒店\",\n              \"名称\",\n              \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为16\",\n          \"{'名称': '苏州慢享主题酒店', '区域': '姑苏区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '收费', '房费': '266元', '地址': '苏州姑苏区景德路26-64号', '电话号码': '0512-67570999', '评分': 4.7}\",\n          \"{'名称': '苏州慢享主题酒店', '区域': '姑苏区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '收费', '房费': '278元', '地址': '苏州姑苏区景德路26-64号', '电话号码': '0512-67570999', '评分': 4.7}\",\n          \"{'名称': '美锦酒店', '区域': '高新区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '308元', '地址': '苏州高新区滨河路999号花样年喜年生活广场5栋1层', '电话号码': '0512-66053331', '评分': 4.8}\",\n          \"{'名称': '美锦酒店', '区域': '高新区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '349元', '地址': '苏州高新区滨河路999号花样年喜年生活广场5栋1层', '电话号码': '0512-66053331', '评分': 4.8}\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\",\n          \"{'名称': '苏州途乐酒店公寓', '区域': '工业园区', '星级': '2', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '收费', '房费': '486元', '地址': '苏州工业园区苏州丰隆城市中心T1楼', '电话号码': '151-5149-7911', '评分': 4.6}\",\n          \"{'名称': '苏州途乐酒店公寓', '区域': '工业园区', '星级': '2', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '收费', '房费': '486元', '地址': '苏州工业园区苏州丰隆城市中心T1楼', '电话号码': '151-5149-7911', '评分': 4.6}\",\n          \"{'名称': '万悦酒店', '区域': '吴中区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '346元', '地址': '苏州吴中区金山路47-2号', '电话号码': '0512-83808380', '评分': 4.5}\",\n          \"{'名称': '万悦酒店', '区域': '吴中区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '338元', '地址': '苏州吴中区金山路47-2号', '电话号码': '0512-83808380', '评分': 4.5}\",\n          \"{'名称': '周庄多瓦台临河客栈', '区域': '昆山', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '收费', '房费': '279元', '地址': '昆山周庄镇东浜村75号', '电话号码': '181-3619-1632', '评分': 4.8}\",\n          \"{'名称': '周庄多瓦台临河客栈', '区域': '昆山', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '收费', '房费': '279元', '地址': '昆山周庄镇东浜村75号', '电话号码': '181-3619-1632', '评分': 4.8}\",\n          \"{'名称': '常熟长江路亚朵酒店', '区域': '常熟', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '312元', '地址': '常熟长江路268号', '电话号码': '0512-51661666', '评分': 4.8}\",\n          \"{'名称': '常熟长江路亚朵酒店', '区域': '常熟', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '312元', '地址': '常熟长江路268号', '电话号码': '0512-51661666', '评分': 4.8}\",\n          \"{'名称': '维也纳酒店', '区域': '常熟', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '322元', '地址': '常熟碧溪镇金港路8号', '电话号码': '0512-52018188', '评分': 4.8}\",\n          \"{'名称': '维也纳酒店', '区域': '常熟', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '340元', '地址': '常熟碧溪镇金港路8号', '电话号码': '0512-52018188', '评分': 4.8}\"\n      ],\n      \"segmented_user_utterance\": \"哦 ， 好 的 。 还有 酒店 ， 帮 我 推荐 个 价位 中等 的 商务酒店 ！\",\n      \"segmented_system_utterance\": \"推荐 苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店 ， 在 金鸡湖 景区 附近 。\"\n  },\n  {\n      \"turn_id\": 4,\n      \"turn_domain\": [\n          \"酒店\"\n      ],\n      \"user_utterance\": \"你想得真周到!这家酒店有停车场吗？\",\n      \"system_utterance\": \"酒店配备有一个免费的停车场。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn request\": [\n              \"停车场\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"酒店\",\n              \"名称\",\n              \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          ],\n          [\n              \"Request\",\n              \"酒店\",\n              \"停车场\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"酒店\",\n              \"停车场\",\n              \"免费\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为2\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\"\n      ],\n      \"segmented_user_utterance\": \"你 想 得 真 周到 ! 这家 酒店 有 停车场 吗 ？\",\n      \"segmented_system_utterance\": \"酒店 配备 有 一个 免费 的 停车场 。\"\n  },\n  {\n      \"turn_id\": 5,\n      \"turn_domain\": [\n          \"酒店\"\n      ],\n      \"user_utterance\": \"酒店的电话告诉我一下。\",\n      \"system_utterance\": \"电话号码：0512-69995666。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {},\n          \"turn request\": [\n              \"电话号码\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Request\",\n              \"酒店\",\n              \"电话号码\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"酒店\",\n              \"电话号码\",\n              \"0512 - 69995666\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为2\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\"\n      ],\n      \"segmented_user_utterance\": \"酒店 的 电话 告诉 我 一下 。\",\n      \"segmented_system_utterance\": \"电话号码 ： 0512 - 69995666 。\"\n  },\n  {\n      \"turn_id\": 6,\n      \"turn_domain\": [\n          \"餐厅\"\n      ],\n      \"user_utterance\": \"嗯，我还想拔草美食。有没有价位中等的江浙菜餐厅推荐一个。\",\n      \"system_utterance\": \"推荐老枫庭。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\"\n          },\n          \"turn request\": []\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"菜系\",\n              \"江浙 菜\"\n          ],\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"价位\",\n              \"中等\"\n          ],\n          [\n              \"General\",\n              \"餐厅\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Recommend\",\n              \"餐厅\",\n              \"名称\",\n              \"老枫庭\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为3\",\n          \"{'名称': '老枫庭', '区域': '吴中区', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '94元', '地址': '木渎灵天路55号芭提雅休闲娱乐广场5号楼', '电话号码': '0512-66277557', '评分': 4.6, '营业时间': '周一至周日 11:00-13:30 17:00-20:30', '推荐菜': '松鼠桂鱼'}\",\n          \"{'名称': '同里老家', '区域': '吴江', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '87元', '地址': '同里镇环湖西路318号', '电话号码': '0512-63339777', '评分': 4.5, '营业时间': '周一至周日 10:00-22:00', '推荐菜': '太湖三白'}\",\n          \"{'名称': '周庄花间堂桔梗餐厅', '区域': '昆山', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '98元', '地址': '周庄镇中市街110号景点迷楼北10米', '电话号码': '138-1289-9961', '评分': 4.5, '营业时间': '周一至周日 07:00-20:30', '推荐菜': '银鱼炒蛋'}\"\n      ],\n      \"segmented_user_utterance\": \"嗯 ， 我 还 想 拔草 美食 。 有没有 价位 中等 的 江浙 菜 餐厅 推荐 一个 。\",\n      \"segmented_system_utterance\": \"推荐 老枫庭 。\"\n  },\n  {\n      \"turn_id\": 7,\n      \"turn_domain\": [\n          \"餐厅\"\n      ],\n      \"user_utterance\": \"这家餐厅地铁直达吗？\",\n      \"system_utterance\": \"抱歉，餐厅没有地铁直达!\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\",\n              \"餐厅-名称\": \"老枫庭\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {\n              \"餐厅-名称\": \"老枫庭\"\n          },\n          \"turn request\": [\n              \"是否地铁直达\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"名称\",\n              \"老枫庭\"\n          ],\n          [\n              \"Request\",\n              \"餐厅\",\n              \"是否地铁直达\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"是否地铁直达\",\n              \"否\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为1\",\n          \"{'名称': '老枫庭', '区域': '吴中区', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '94元', '地址': '木渎灵天路55号芭提雅休闲娱乐广场5号楼', '电话号码': '0512-66277557', '评分': 4.6, '营业时间': '周一至周日 11:00-13:30 17:00-20:30', '推荐菜': '松鼠桂鱼'}\"\n      ],\n      \"segmented_user_utterance\": \"这家 餐厅 地铁 直达 吗 ？\",\n      \"segmented_system_utterance\": \"抱歉 ， 餐厅 没有 地铁 直达 !\"\n  },\n  {\n      \"turn_id\": 8,\n      \"turn_domain\": [\n          \"餐厅\"\n      ],\n      \"user_utterance\": \"人均消费呢？\",\n      \"system_utterance\": \"老枫庭人均消费在94元左右。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\",\n              \"餐厅-名称\": \"老枫庭\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {},\n          \"turn request\": [\n              \"人均消费\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Request\",\n              \"餐厅\",\n              \"人均消费\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"人均消费\",\n              \"老枫庭\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为1\",\n          \"{'名称': '老枫庭', '区域': '吴中区', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '94元', '地址': '木渎灵天路55号芭提雅休闲娱乐广场5号楼', '电话号码': '0512-66277557', '评分': 4.6, '营业时间': '周一至周日 11:00-13:30 17:00-20:30', '推荐菜': '松鼠桂鱼'}\"\n      ],\n      \"segmented_user_utterance\": \"人均 消费 呢 ？\",\n      \"segmented_system_utterance\": \"老枫庭 人均 消费 在 94 元 左右 。\"\n  },\n  {\n      \"turn_id\": 9,\n      \"turn_domain\": [\n          \"通用\"\n      ],\n      \"user_utterance\": \"好，所有问题都解决了，谢谢你的帮助！再见！\",\n      \"system_utterance\": \"不客气！这是我应该做的！再见！\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\",\n              \"餐厅-名称\": \"老枫庭\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {},\n          \"turn request\": []\n      },\n      \"user_actions\": [\n          [\n              \"Bye\",\n              \"通用\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Bye\",\n              \"通用\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"db_results\": [],\n      \"segmented_user_utterance\": \"好 ， 所有 问题 都 解决 了 ， 谢谢 你 的 帮助 ！ 再见 ！\",\n      \"segmented_system_utterance\": \"不 客气 ！ 这 是 我 应该 做 的 ！ 再见 ！\"\n  }\n]\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRiSAWOZ is the first large-scale multi-domain Chinese Wizard-of-Oz dataset with rich semantic\n              annotations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus contains rich semantic annotations, such as ellipsis and coreference, in addition to\n              traditional dialogue annotations (dialogue states, dialogue acts, etc.), which can be used in various\n              tasks in dialogue system.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNatural Language Understanding, Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference\n              Resolution, Unified Generative Ellipsis and Coreference Resolution\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://terryqj0107.github.io/RiSAWOZ_webpage\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eIn task-oriented dialogue system, the Natural Language Understanding (NLU) module aims to convert the\n                user utterance into the representation that computer can understand, which includes intent and dialogue\n                act (slot \u0026#x26; value) detection.\u003c/li\u003e\n              \u003cli\u003eDialogue State Tracking (DST) is a core component in task-oriented dialogue systems, which extracts\n                dialogue states (user goals) embedded in dialogue context. It has progressed toward open-vocabulary or\n                generation-based DST where state-of-the-art models can generate dialogue states from dialogue context\n                directly.\u003c/li\u003e\n              \u003cli\u003eContext-to-Text Generation: encoding dialogue context to decode system response.\u003c/li\u003e\n              \u003cli\u003eCoreference Resolution: predict coreference clusters where all mentions are referring to the same\n                entity for each dialogue.\u003c/li\u003e\n              \u003cli\u003eUnified Generative Ellipsis and Coreference Resolution: generating omitted or referred expressions\n                from the dialogue context.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNatural Language Understanding, Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference\n              Resolution, Unified Generative Ellipsis and Coreference Resolution\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eNatural Language Understanding:\n                \u003cul\u003e\n                  \u003cli\u003eF1 score: F1 score of user intent.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eDialogue State Tracking:\n                \u003cul\u003e\n                  \u003cli\u003eJoint Accuracy: accuracy of turn-level dialogue states.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eDialogue Context-to-Text Generation:\n                \u003cul\u003e\n                  \u003cli\u003einform rate: measures the percentage that the output contains the appropriate entity the user asks\n                    for.\u003c/li\u003e\n                  \u003cli\u003esuccess rate: estimates the proportion that all the requested attributes have been answered.\u003c/li\u003e\n                  \u003cli\u003eBLEU: the BLEU score of generated system response.\u003c/li\u003e\n                  \u003cli\u003eCombined Score: (inform + success) ∗ 0.5 + BLEU as an overall\n                    quality.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eCoreference Resolution:\n                \u003cul\u003e\n                  \u003cli\u003eMUC F1 Score: a link-based metric. Mentions in the same entity/cluster are considered “linked”.\n                    MUC penalizes the missing links and incorrect links, each with the same weight.\u003c/li\u003e\n                  \u003cli\u003eB3 F1 Score: a mention-based metric.The evaluation score depends on the fraction of the correct\n                    mentions included in the response entities (i.e. entities created by the system).\u003c/li\u003e\n                  \u003cli\u003eCEAFφ4 F1 Score: a metric which assumes each key entity should only be mapped to one response\n                    entity, and vice versa. It aligns the key entities (clusters) with the response entities in the best\n                    way, and compute scores from that alignment.\u003c/li\u003e\n                  \u003cli\u003eAverage F1 Score: an average F1 score of the above three metrics.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eUnified Generative Ellipsis and Coreference Resolution:\n                \u003cul\u003e\n                  \u003cli\u003eExact Match Rate: measures whether the generated utterances exactly match the ground-truth\n                    utterances.\u003c/li\u003e\n                  \u003cli\u003eBLEU: the BLEU score of generated utterances\u003c/li\u003e\n                  \u003cli\u003eResolution F1: comparing machine-generated words with ground-truth words only from the\n                    ellipsis/coreference part of user utterances.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esee \"Definitions of other metrics\"\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esame as our dataset\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJoint Accuracy, Inform Rate, Success Rate, BLEU Score and Combined Score on MultiWOZ and CrossWOZ\n              dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGather human-to-human dialog in Chinese.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate system response given dialogue context across multiple domains.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003edomains: Attraction, Restaurant, Hotel, Flight, Train, Weather, Movie, TV, Computer, Car, Hospital,\n              Courses\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ehybrid\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRule-based and manual selection criteria\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e51\u0026#x3C;n\u0026#x3C;100\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eChinese native speaker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e3\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e3\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003edialogue_id (string): dialogue ID\u003c/li\u003e\n              \u003cli\u003egoal (string): natural language descriptions of the user goal\u003c/li\u003e\n              \u003cli\u003edomains (list of strings): domains mentioned in current dialogue session\u003c/li\u003e\n              \u003cli\u003eturn_id (int): turn ID\u003c/li\u003e\n              \u003cli\u003eturn_domain (list of strings): domain mentioned in current turn\u003c/li\u003e\n              \u003cli\u003ebelief_state (dict): dialogue state, including:\n                \u003cul\u003e\n                  \u003cli\u003einform slot-values (dict): the slots and corresponding values informed until current turn\u003c/li\u003e\n                  \u003cli\u003eturn_inform (dict): the slots and corresponding values informed in current turn\u003c/li\u003e\n                  \u003cli\u003eturn request (dict): the slots requested in current turn\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003euser_actions (list of lists): user dialogue acts in current turn\u003c/li\u003e\n              \u003cli\u003euser_actions (list of lists): system dialogue acts in current turn\u003c/li\u003e\n              \u003cli\u003edb_results (list of strings): database search results\u003c/li\u003e\n              \u003cli\u003esegmented_user_utterance (string): word segmentation result of user utterance\u003c/li\u003e\n              \u003cli\u003esegmented_system_utterance (string): word segmentation result of system utterance\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators agree using the dataset for research purpose.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAny\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe slots and values as well as utterances do not contain any personal information.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eMaintenance Plan Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the original dataset's maintenance plan.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBuilding a leaderboard webpage to trace and display the latest results on the \u003ca\n                href=\"https://terryqj0107.github.io/RiSAWOZ_webpage/\"\u003edataset\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMaintainer Contact Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide contact information of a person responsible for the dataset maintenance\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDeyi Xiong (\u003ca href=\"mailto:dyxiong@tju.edu.cn\"\u003edyxiong@tju.edu.cn\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny Contestation Mechanism?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the maintenance plan include a contestation mechanism allowing individuals to request removal\n                    fo content?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003econtact maintainer\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContestation Form Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the form link or contact information\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDeyi Xiong (\u003ca href=\"mailto:dyxiong@tju.edu.cn\"\u003edyxiong@tju.edu.cn\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRiSAWOZ is the first large-scale multi-domain Chinese Wizard-of-Oz dataset with rich semantic\n              annotations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUsing the trained model on domains that are not included in the 12 domains selected for this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDesigning models that leverage unknown bias in the dataset to optimize specific metrics.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"RiSAWOZ","type":"Dialog Response Generation","languages":"Mandarin Chinese","summary":"RiSAWOZ is a Chinese dialog dataset. It can be used to study various dialogue tasks, such as Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and Unified Generative Ellipsis and Coreference Resolution."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"RiSAWOZ"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"RiSAWOZ","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eRiSAWOZ\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eRiSAWOZ is a Chinese dialog dataset. It can be used to study various dialogue tasks, such as Dialogue State\n          Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and Unified Generative Ellipsis and\n          Coreference Resolution.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/RiSAWOZ')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/RiSAWOZ\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://terryqj0107.github.io/RiSAWOZ_webpage\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.67\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJun Quan (Soochow University, Suzhou, China), Shian Zhang (Soochow University, Suzhou, China), Qian\n          Cao(Soochow University, Suzhou, China), Zizhong Li (Tianjin University, Tianjin, China), Deyi Xiong (Tianjin\n          University, Tianjin, China)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eDeyi Xiong\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eMandarin Chinese\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGenerate system response given dialogue context across multiple domains.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://terryqj0107.github.io/RiSAWOZ_webpage\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/terryqj0107/RiSAWOZ\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.67\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{quan-etal-2020-risawoz,\ntitle = \"{R}i{SAWOZ}: A Large-Scale Multi-Domain {W}izard-of-{O}z Dataset with Rich Semantic Annotations for Task-Oriented Dialogue Modeling\",\nauthor = \"Quan, Jun  and\nZhang, Shian  and\nCao, Qian  and\nLi, Zizhong  and\nXiong, Deyi\",\nbooktitle = \"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)\",\nmonth = nov,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/2020.emnlp-main.67\",\npages = \"930--940\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDeyi Xiong\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:dyxiong@tju.edu.cn\"\u003edyxiong@tju.edu.cn\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOnly Mandarin Chinese is covered in this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMandarin Chinese\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRiSAWOZ can be used to support the study under various dialogue tasks, such as Natural Language\n              Understanding, Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and\n              Unified Generative Ellipsis and Coreference Resolution.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate system response given dialogue context across multiple domains.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSoochow University and Tianjin University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJun Quan (Soochow University, Suzhou, China), Shian Zhang (Soochow University, Suzhou, China), Qian\n              Cao(Soochow University, Suzhou, China), Zizhong Li (Tianjin University, Tianjin, China), Deyi Xiong\n              (Tianjin University, Tianjin, China)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ethe National Key Research and Development Project\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTianhao Shen (Tianjin University, Tianjin, China), Chaobin You (Tianjin University, Tianjin, China), Deyi\n              Xiong (Tianjin University, Tianjin, China)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003egem_id (string): GEM-RiSAWOZ-{split}-{id}\u003c/li\u003e\n              \u003cli\u003edialogue_id (string): dialogue ID\u003c/li\u003e\n              \u003cli\u003egoal (string): natural language descriptions of the user goal\u003c/li\u003e\n              \u003cli\u003edomains (list of strings): domains mentioned in current dialogue session\u003c/li\u003e\n              \u003cli\u003edialogue (list of dicts): dialog turns and corresponding annotations. Each turn includes:\n                \u003cul\u003e\n                  \u003cli\u003eturn_id (int): turn ID\u003c/li\u003e\n                  \u003cli\u003eturn_domain (list of strings): domain mentioned in current turn\u003c/li\u003e\n                  \u003cli\u003euser_utterance (string): user utterance\u003c/li\u003e\n                  \u003cli\u003esystem_utterance (string): system utterance\u003c/li\u003e\n                  \u003cli\u003ebelief_state (dict): dialogue state, including:\n                    \u003cul\u003e\n                      \u003cli\u003einform slot-values (dict): the slots and corresponding values informed until current turn\u003c/li\u003e\n                      \u003cli\u003eturn_inform (dict): the slots and corresponding values informed in current turn\u003c/li\u003e\n                      \u003cli\u003eturn request (dict): the slots requested in current turn\u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003euser_actions (list of lists): user dialogue acts in current turn\u003c/li\u003e\n                  \u003cli\u003euser_actions (list of lists): system dialogue acts in current turn\u003c/li\u003e\n                  \u003cli\u003edb_results (list of strings): database search results\u003c/li\u003e\n                  \u003cli\u003esegmented_user_utterance (string): word segmentation result of user utterance\u003c/li\u003e\n                  \u003cli\u003esegmented_system_utterance (string): word segmentation result of system utterance\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e[\n{\n  \"dialogue_id\": \"attraction_goal_4-63###6177\",\n  \"goal\": \"attraction_goal_4-63: 你是苏州人，但不怎么出去玩。你朋友来苏州找你，你准备带他逛逛“水乡古镇”，你希望客服给你推荐个消费水平“中等”的地方。然后你要问清楚这地方“是否地铁直达”、“特点”、“门票价格”这些信息。最后，你要感谢客服的帮助，然后说再见。\",\n  \"domains\": [\n      \"旅游景点\"\n  ],\n  \"dialogue\": [\n      {\n          \"turn_id\": 0,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"你好，我是苏州人，但是不怎么出去玩，我朋友来苏州找我了，我准备带他逛逛水乡古镇，你能帮我推荐一下吗？\",\n          \"system_utterance\": \"推荐您去周庄古镇。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\"\n              },\n              \"turn_inform\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\"\n              },\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"景点类型\",\n                  \"水乡 古镇\"\n              ],\n              [\n                  \"Greeting\",\n                  \"旅游景点\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Recommend\",\n                  \"旅游景点\",\n                  \"名称\",\n                  \"周庄 古镇\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为8\",\n              \"{'名称': '周庄古镇', '区域': '昆山', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '100元', '电话号码': '400-8282900', '地址': '苏州市昆山市周庄镇全福路43号', '评分': 4.5, '开放时间': '07:30-18:50', '特点': '小桥流水与人家，双桥水巷摇橹船，还有沈万三的足迹待你寻访'}\",\n              \"{'名称': '同里古镇', '区域': '吴江', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '100元', '电话号码': '0512-63325728', '地址': '苏州市吴江区同里古镇', '评分': 4.5, '开放时间': '07:30-17:30', '特点': '五湖环抱的江南水乡古镇，走三桥，参观小巧精致的园林退思园。'}\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\",\n              \"{'名称': '平江路', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-69163907', '地址': '江苏省苏州市姑苏区白塔东路65号', '评分': 4.5, '开放时间': '全天', '特点': '漫步保存完整的古街区，看小桥流水古宅，逛文艺小店，吃美食。'}\",\n              \"{'名称': '木渎古镇', '区域': '吴中区', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '便宜', '是否地铁直达': '否', '门票价格': '免费', '电话号码': '0512-66514042', '地址': '苏州市吴中区木渎镇山塘街188号', '评分': 4.4, '开放时间': '08:30-17:00', '特点': '以园林为特色的古镇，游严家花园等古典园林，坐船看小桥流水。'}\",\n              \"{'名称': '甪直古镇', '区域': '吴中区', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '便宜', '是否地铁直达': '否', '门票价格': '免费', '电话号码': '0512-66191668', '地址': '苏州市吴中区甪直镇晓市路21号', '评分': 4.3, '开放时间': '07:30-17:30', '特点': '甪直古镇有2500多年历史，甪直境内水流纵横，桥梁密布，有五湖之厅、六泽之冲之称。'}\",\n              \"{'名称': '千灯古镇', '区域': '昆山', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '便宜', '是否地铁直达': '否', '门票价格': '免费', '电话号码': '0512-57472155', '地址': '苏州市昆山市千灯古镇尚书路1号', '评分': 4.3, '开放时间': '08:00-17:00', '特点': '千灯古镇，距今已有2500多年的历史，古镇白墙黑瓦，昆韵盎然。'}\",\n              \"{'名称': '锦溪古镇', '区域': '昆山', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '中等', '是否地铁直达': '否', '门票价格': '65元', '电话号码': '0512-57224669', '地址': '苏州市昆山市锦溪镇邵甸港路18号', '评分': 4.4, '开放时间': '08:00-17:00', '特点': '锦溪古镇位于昆山南郊的淀山湖畔，是一座有千年历史的江南水乡。'}\"\n          ],\n          \"segmented_user_utterance\": \"你好 ， 我 是 苏州人 ， 但是 不怎么 出去玩 ， 我 朋友 来 苏州 找 我 了 ， 我 准备 带 他 逛逛 水乡 古镇 ， 你 能 帮 我 推荐 一下 吗 ？\",\n          \"segmented_system_utterance\": \"推荐 您 去 周庄 古镇 。\"\n      },\n      {\n          \"turn_id\": 1,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"我希望这个地方消费水平适中的，这个是中等的吗？\",\n          \"system_utterance\": \"这个不是，我又重新帮您找了一家山塘街。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\"\n              },\n              \"turn_inform\": {\n                  \"旅游景点-消费\": \"中等\"\n              },\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"消费\",\n                  \"中等\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Recommend\",\n                  \"旅游景点\",\n                  \"名称\",\n                  \"山塘 街\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为3\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\",\n              \"{'名称': '平江路', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-69163907', '地址': '江苏省苏州市姑苏区白塔东路65号', '评分': 4.5, '开放时间': '全天', '特点': '漫步保存完整的古街区，看小桥流水古宅，逛文艺小店，吃美食。'}\",\n              \"{'名称': '锦溪古镇', '区域': '昆山', '景点类型': '水乡古镇', '最适合人群': '朋友出游', '消费': '中等', '是否地铁直达': '否', '门票价格': '65元', '电话号码': '0512-57224669', '地址': '苏州市昆山市锦溪镇邵甸港路18号', '评分': 4.4, '开放时间': '08:00-17:00', '特点': '锦溪古镇位于昆山南郊的淀山湖畔，是一座有千年历史的江南水乡。'}\"\n          ],\n          \"segmented_user_utterance\": \"我 希望 这个 地方 消费水平 适中 的 ， 这个 是 中等 的 吗 ？\",\n          \"segmented_system_utterance\": \"这个 不是 ， 我 又 重新 帮 您 找 了 一家 山塘 街 。\"\n      },\n      {\n          \"turn_id\": 2,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"听这地方应该挺好玩，他们家特点是什么？\",\n          \"system_utterance\": \"特点是欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\",\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn_inform\": {\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn request\": [\n                  \"特点\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"名称\",\n                  \"山塘 街\"\n              ],\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"特点\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"特点\",\n                  \"欣赏 苏州 老城 风貌 ， 参观 会馆 等 古迹 ， 乘 游船 ， 拍摄 美丽 的 夜景\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\"\n          ],\n          \"segmented_user_utterance\": \"听 这 地方 应该 挺好玩 ， 他们 家 特点 是 什么 ？\",\n          \"segmented_system_utterance\": \"特点 是 欣赏 苏州 老城 风貌 ， 参观 会馆 等 古迹 ， 乘 游船 ， 拍摄 美丽 的 夜景 。\"\n      },\n      {\n          \"turn_id\": 3,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"听起来很不错，他们家门票是多少？\",\n          \"system_utterance\": \"门票免费。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\",\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": [\n                  \"门票价格\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"门票价格\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"门票价格\",\n                  \"免费\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\"\n          ],\n          \"segmented_user_utterance\": \"听 起来 很 不错 ， 他们 家 门票 是 多少 ？\",\n          \"segmented_system_utterance\": \"门票 免费 。\"\n      },\n      {\n          \"turn_id\": 4,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"还行，那他们家是否有地铁直达呢？\",\n          \"system_utterance\": \"嗯嗯，可以地铁直达。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\",\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": [\n                  \"是否地铁直达\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"是否地铁直达\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"是否地铁直达\",\n                  \"是\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '山塘街', '区域': '姑苏区', '景点类型': '水乡古镇', '最适合人群': '情侣约会', '消费': '中等', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '0512-65314467', '地址': '苏州市姑苏区古城西北', '评分': 4.5, '开放时间': '08:30-21:00', '特点': '欣赏苏州老城风貌，参观会馆等古迹，乘游船，拍摄美丽的夜景。'}\"\n          ],\n          \"segmented_user_utterance\": \"还 行 ， 那 他们 家 是否 有 地铁 直达 呢 ？\",\n          \"segmented_system_utterance\": \"嗯 嗯 ， 可以 地铁 直达 。\"\n      },\n      {\n          \"turn_id\": 5,\n          \"turn_domain\": [\n              \"通用\"\n          ],\n          \"user_utterance\": \"谢谢你的帮助，再见！\",\n          \"system_utterance\": \"再见。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-景点类型\": \"水乡 古镇\",\n                  \"旅游景点-消费\": \"中等\",\n                  \"旅游景点-名称\": \"山塘 街\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"Bye\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Bye\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"db_results\": [],\n          \"segmented_user_utterance\": \"谢谢 你 的 帮助 ， 再见 ！\",\n          \"segmented_system_utterance\": \"再见 。\"\n      }\n  ]\n},\n{\n  \"dialogue_id\": \"attraction_goal_3-33_v2###7307\",\n  \"goal\": \"attraction_goal_3-33: 你和朋友一起来苏州出差，顺便想去“苏州乐园森林水世界”逛一逛。因此，你要询问客服这个地方“是否地铁直达”、“评分”、“特点”这些信息。最后，你向客服表示感谢，结束对话。\",\n  \"domains\": [\n      \"旅游景点\"\n  ],\n  \"dialogue\": [\n      {\n          \"turn_id\": 0,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"你好，我和朋友一起来苏州出差，顺便想去苏州乐园森林水世界玩玩，能否帮我看看这个地方是否有地铁直达呢？\",\n          \"system_utterance\": \"稍等，很抱歉，没有地铁能直接到这个景点呢。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn request\": [\n                  \"是否地铁直达\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"名称\",\n                  \"苏州 乐园 森林 水 世界\"\n              ],\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"是否地铁直达\",\n                  \"\"\n              ],\n              [\n                  \"Greeting\",\n                  \"旅游景点\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"是否地铁直达\",\n                  \"否\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '苏州乐园森林水世界', '区域': '高新区', '景点类型': '游乐场', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '198元', '电话号码': '0512-68717107', '地址': '苏州市高新区阳山东路以西，山神湾路以东', '评分': 4.4, '开放时间': '09:00-17:00', '特点': '各项水上娱乐项目皆具森林特色，被赋予森林主题，让人身处浩瀚森林又能享受到海洋般的舒爽水游乐。'}\"\n          ],\n          \"segmented_user_utterance\": \"你好 ， 我 和 朋友 一 起来 苏州 出差 ， 顺便 想 去 苏州 乐园 森林 水 世界 玩玩 ， 能否 帮 我 看看 这个 地方 是否 有 地铁 直达 呢 ？\",\n          \"segmented_system_utterance\": \"稍 等 ， 很 抱歉 ， 没有 地铁 能 直接 到 这个 景点 呢 。\"\n      },\n      {\n          \"turn_id\": 1,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"你再帮我看看景点的评分好吗？\",\n          \"system_utterance\": \"这个景点的评分是4.4分。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": [\n                  \"评分\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"评分\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"评分\",\n                  \"4.4\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '苏州乐园森林水世界', '区域': '高新区', '景点类型': '游乐场', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '198元', '电话号码': '0512-68717107', '地址': '苏州市高新区阳山东路以西，山神湾路以东', '评分': 4.4, '开放时间': '09:00-17:00', '特点': '各项水上娱乐项目皆具森林特色，被赋予森林主题，让人身处浩瀚森林又能享受到海洋般的舒爽水游乐。'}\"\n          ],\n          \"segmented_user_utterance\": \"你 再 帮 我 看看 景点 的 评分 好 吗 ？\",\n          \"segmented_system_utterance\": \"这个 景点 的 评分 是 4.4 分 。\"\n      },\n      {\n          \"turn_id\": 2,\n          \"turn_domain\": [\n              \"旅游景点\"\n          ],\n          \"user_utterance\": \"这个景点有啥特点呢？\",\n          \"system_utterance\": \"这个景点的特点是各项水上娱乐项目皆具森林特色，被赋予森林主题，让人身处浩瀚森林又能享受到海洋般的舒爽水游乐。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": [\n                  \"特点\"\n              ]\n          },\n          \"user_actions\": [\n              [\n                  \"Request\",\n                  \"旅游景点\",\n                  \"特点\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Inform\",\n                  \"旅游景点\",\n                  \"特点\",\n                  \"各项 水上 娱乐 项目 皆 具 森林 特色 ， 被 赋予 森林 主题 ， 让 人 身处 浩瀚 森林 又 能 享受 到 海洋 般的 舒爽 水 游乐 。\"\n              ]\n          ],\n          \"db_results\": [\n              \"数据库检索结果：成功匹配个数为1\",\n              \"{'名称': '苏州乐园森林水世界', '区域': '高新区', '景点类型': '游乐场', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '198元', '电话号码': '0512-68717107', '地址': '苏州市高新区阳山东路以西，山神湾路以东', '评分': 4.4, '开放时间': '09:00-17:00', '特点': '各项水上娱乐项目皆具森林特色，被赋予森林主题，让人身处浩瀚森林又能享受到海洋般的舒爽水游乐。'}\"\n          ],\n          \"segmented_user_utterance\": \"这个 景点 有 啥 特点 呢 ？\",\n          \"segmented_system_utterance\": \"这个 景点 的 特点 是 各项 水上 娱乐 项目 皆 具 森林 特色 ， 被 赋予 森林 主题 ， 让 人 身处 浩瀚 森林 又 能 享受 到 海洋 般的 舒爽 水 游乐 。\"\n      },\n      {\n          \"turn_id\": 3,\n          \"turn_domain\": [\n              \"通用\"\n          ],\n          \"user_utterance\": \"太好了，我觉得我会很喜欢这个地方，哈哈。\",\n          \"system_utterance\": \"这个景点挺不错的，就是没有地铁需要您乘坐其它交通工具了。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"General\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"General\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"db_results\": [],\n          \"segmented_user_utterance\": \"太好了 ， 我 觉得 我会 很 喜欢 这个 地方 ， 哈哈 。\",\n          \"segmented_system_utterance\": \"这个 景点 挺不错 的 ， 就是 没有 地铁 需要 您 乘坐 其它 交通工具 了 。\"\n      },\n      {\n          \"turn_id\": 4,\n          \"turn_domain\": [\n              \"通用\"\n          ],\n          \"user_utterance\": \"我打出租车去吧，就是多花点车费，没事。\",\n          \"system_utterance\": \"请问您还有其他问题吗？\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"General\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"General\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"db_results\": [],\n          \"segmented_user_utterance\": \"我 打 出租车 去 吧 ， 就是 多花点 车费 ， 没事 。\",\n          \"segmented_system_utterance\": \"请问 您 还有 其他 问题 吗 ？\"\n      },\n      {\n          \"turn_id\": 5,\n          \"turn_domain\": [\n              \"通用\"\n          ],\n          \"user_utterance\": \"没有了。谢谢你，再见。\",\n          \"system_utterance\": \"再见。\",\n          \"belief_state\": {\n              \"inform slot-values\": {\n                  \"旅游景点-名称\": \"苏州 乐园 森林 水 世界\"\n              },\n              \"turn_inform\": {},\n              \"turn request\": []\n          },\n          \"user_actions\": [\n              [\n                  \"Bye\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"system_actions\": [\n              [\n                  \"Bye\",\n                  \"通用\",\n                  \"\",\n                  \"\"\n              ]\n          ],\n          \"db_results\": [],\n          \"segmented_user_utterance\": \"没有 了 。 谢谢 你 ， 再见 。\",\n          \"segmented_system_utterance\": \"再见 。\"\n      }\n  ]\n}\n]\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTrain:\n                \u003cul\u003e\n                  \u003cli\u003edialogues: 10000\u003c/li\u003e\n                  \u003cli\u003eturns: 134580\u003c/li\u003e\n                  \u003cli\u003eAvg. turns: 13.5\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eValid:\n                \u003cul\u003e\n                  \u003cli\u003edialogues: 600\u003c/li\u003e\n                  \u003cli\u003eturns: 8116\u003c/li\u003e\n                  \u003cli\u003eAvg. turns: 13.5\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eTest:\n                \u003cul\u003e\n                  \u003cli\u003edialogues: 600\u003c/li\u003e\n                  \u003cli\u003eturn: 9286\u003c/li\u003e\n                  \u003cli\u003eAvg. turns: 15.5\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ethe most complex example (i.e. with the most domains):\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"dialogue_id\": \"attraction_restaurant_hotel_goal_2-69_v2###8355\",\n\"goal\": \"attraction_restaurant_hotel_goal_2-69: 公司派你去苏州出差，你准备抽空旅游一下。第一，你想去一个“山水景区”，消费水平最好是“偏贵”的，你需要问清楚这地方“开放时间”、“门票价格”这些信息。第二，你要找个地方住，让客服给你推荐一家价位“中等”的“商务出行”酒店，并且询问这家酒店的“停车场”、“电话号码”这些信息。第三，你还想拔草美食，问客服有没有价位“中等”的“江浙菜”餐厅推荐，然后了解这家餐厅的“是否地铁直达”、“人均消费”这些信息。最后你要感谢客服的帮助，并说再见。\",\n\"domains\": [\n  \"旅游景点\",\n  \"餐厅\",\n  \"酒店\"\n],\n\"dialogue\": [\n  {\n      \"turn_id\": 0,\n      \"turn_domain\": [\n          \"旅游景点\"\n      ],\n      \"user_utterance\": \"你好，来苏州出差，准备抽空旅游一下。我想去山水景区，消费水平偏贵点，帮忙推荐一个。\",\n      \"system_utterance\": \"你好，向您推荐金鸡湖景区，感受一下苏州现代化的一面。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\"\n          },\n          \"turn_inform\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\"\n          },\n          \"turn request\": []\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"景点类型\",\n              \"山水 景区\"\n          ],\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"消费\",\n              \"偏贵\"\n          ],\n          [\n              \"Greeting\",\n              \"旅游景点\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"旅游景点\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Greeting\",\n              \"通用\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"Recommend\",\n              \"旅游景点\",\n              \"名称\",\n              \"金鸡湖 景区\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为2\",\n          \"{'名称': '金鸡湖景区', '区域': '工业园区', '景点类型': '山水景区', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '400-7558558', '地址': '苏州市工业园区星港街158号', '评分': 4.5, '开放时间': '全天', '特点': '看东方之门等高楼，坐摩天轮，乘船夜游，感受苏州现代化的一面。'}\",\n          \"{'名称': '沙家浜风景区', '区域': '常熟', '景点类型': '山水景区', '最适合人群': '家庭亲子', '消费': '偏贵', '是否地铁直达': '否', '门票价格': '110元', '电话号码': '0512-52500000', '地址': '苏州市常熟市沙家浜镇', '评分': 4.5, '开放时间': '08:00-16:30', '特点': '京剧样板戏《沙家浜》的发源地，访剧中场景，坐船深入芦苇荡。'}\"\n      ],\n      \"segmented_user_utterance\": \"你好 ， 来 苏州 出差 ， 准备 抽空 旅游 一下 。 我 想 去 山水 景区 ， 消费水平 偏 贵点 ， 帮忙 推荐 一个 。\",\n      \"segmented_system_utterance\": \"你好 ， 向 您 推荐 金鸡湖 景区 ， 感受一下 苏州 现代化 的 一面 。\"\n  },\n  {\n      \"turn_id\": 1,\n      \"turn_domain\": [\n          \"旅游景点\"\n      ],\n      \"user_utterance\": \"这里开放时间？需要安排一下。\",\n      \"system_utterance\": \"景区是全天开放的，可以尽情游览！\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\"\n          },\n          \"turn_inform\": {\n              \"旅游景点-名称\": \"金鸡湖 景区\"\n          },\n          \"turn request\": [\n              \"开放时间\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"名称\",\n              \"金鸡湖 景区\"\n          ],\n          [\n              \"Request\",\n              \"旅游景点\",\n              \"开放时间\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"旅游景点\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"开放时间\",\n              \"全天\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为1\",\n          \"{'名称': '金鸡湖景区', '区域': '工业园区', '景点类型': '山水景区', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '400-7558558', '地址': '苏州市工业园区星港街158号', '评分': 4.5, '开放时间': '全天', '特点': '看东方之门等高楼，坐摩天轮，乘船夜游，感受苏州现代化的一面。'}\"\n      ],\n      \"segmented_user_utterance\": \"这里 开放 时间 ？ 需要 安排 一下 。\",\n      \"segmented_system_utterance\": \"景区 是 全天 开放 的 ， 可以 尽情 游览 ！\"\n  },\n  {\n      \"turn_id\": 2,\n      \"turn_domain\": [\n          \"旅游景点\"\n      ],\n      \"user_utterance\": \"门票多少钱？\",\n      \"system_utterance\": \"这里是免费景区。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\"\n          },\n          \"turn_inform\": {},\n          \"turn request\": [\n              \"门票价格\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Request\",\n              \"旅游景点\",\n              \"门票价格\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"旅游景点\",\n              \"门票价格\",\n              \"免费\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为1\",\n          \"{'名称': '金鸡湖景区', '区域': '工业园区', '景点类型': '山水景区', '最适合人群': '情侣约会', '消费': '偏贵', '是否地铁直达': '是', '门票价格': '免费', '电话号码': '400-7558558', '地址': '苏州市工业园区星港街158号', '评分': 4.5, '开放时间': '全天', '特点': '看东方之门等高楼，坐摩天轮，乘船夜游，感受苏州现代化的一面。'}\"\n      ],\n      \"segmented_user_utterance\": \"门票 多少 钱 ？\",\n      \"segmented_system_utterance\": \"这里 是 免费 景区 。\"\n  },\n  {\n      \"turn_id\": 3,\n      \"turn_domain\": [\n          \"酒店\"\n      ],\n      \"user_utterance\": \"哦，好的。还有酒店，帮我推荐个价位中等的商务酒店！\",\n      \"system_utterance\": \"推荐苏州金鸡湖李公堤瑞贝庭公寓酒店，在金鸡湖景区附近。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\"\n          },\n          \"turn_inform\": {\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\"\n          },\n          \"turn request\": []\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"酒店\",\n              \"价位\",\n              \"中等\"\n          ],\n          [\n              \"Inform\",\n              \"酒店\",\n              \"酒店类型\",\n              \"商务 出行\"\n          ],\n          [\n              \"General\",\n              \"酒店\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Recommend\",\n              \"酒店\",\n              \"名称\",\n              \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为16\",\n          \"{'名称': '苏州慢享主题酒店', '区域': '姑苏区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '收费', '房费': '266元', '地址': '苏州姑苏区景德路26-64号', '电话号码': '0512-67570999', '评分': 4.7}\",\n          \"{'名称': '苏州慢享主题酒店', '区域': '姑苏区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '收费', '房费': '278元', '地址': '苏州姑苏区景德路26-64号', '电话号码': '0512-67570999', '评分': 4.7}\",\n          \"{'名称': '美锦酒店', '区域': '高新区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '308元', '地址': '苏州高新区滨河路999号花样年喜年生活广场5栋1层', '电话号码': '0512-66053331', '评分': 4.8}\",\n          \"{'名称': '美锦酒店', '区域': '高新区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '349元', '地址': '苏州高新区滨河路999号花样年喜年生活广场5栋1层', '电话号码': '0512-66053331', '评分': 4.8}\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\",\n          \"{'名称': '苏州途乐酒店公寓', '区域': '工业园区', '星级': '2', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '收费', '房费': '486元', '地址': '苏州工业园区苏州丰隆城市中心T1楼', '电话号码': '151-5149-7911', '评分': 4.6}\",\n          \"{'名称': '苏州途乐酒店公寓', '区域': '工业园区', '星级': '2', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '收费', '房费': '486元', '地址': '苏州工业园区苏州丰隆城市中心T1楼', '电话号码': '151-5149-7911', '评分': 4.6}\",\n          \"{'名称': '万悦酒店', '区域': '吴中区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '346元', '地址': '苏州吴中区金山路47-2号', '电话号码': '0512-83808380', '评分': 4.5}\",\n          \"{'名称': '万悦酒店', '区域': '吴中区', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '338元', '地址': '苏州吴中区金山路47-2号', '电话号码': '0512-83808380', '评分': 4.5}\",\n          \"{'名称': '周庄多瓦台临河客栈', '区域': '昆山', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '收费', '房费': '279元', '地址': '昆山周庄镇东浜村75号', '电话号码': '181-3619-1632', '评分': 4.8}\",\n          \"{'名称': '周庄多瓦台临河客栈', '区域': '昆山', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '收费', '房费': '279元', '地址': '昆山周庄镇东浜村75号', '电话号码': '181-3619-1632', '评分': 4.8}\",\n          \"{'名称': '常熟长江路亚朵酒店', '区域': '常熟', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '312元', '地址': '常熟长江路268号', '电话号码': '0512-51661666', '评分': 4.8}\",\n          \"{'名称': '常熟长江路亚朵酒店', '区域': '常熟', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '312元', '地址': '常熟长江路268号', '电话号码': '0512-51661666', '评分': 4.8}\",\n          \"{'名称': '维也纳酒店', '区域': '常熟', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '322元', '地址': '常熟碧溪镇金港路8号', '电话号码': '0512-52018188', '评分': 4.8}\",\n          \"{'名称': '维也纳酒店', '区域': '常熟', '星级': '3', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '340元', '地址': '常熟碧溪镇金港路8号', '电话号码': '0512-52018188', '评分': 4.8}\"\n      ],\n      \"segmented_user_utterance\": \"哦 ， 好 的 。 还有 酒店 ， 帮 我 推荐 个 价位 中等 的 商务酒店 ！\",\n      \"segmented_system_utterance\": \"推荐 苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店 ， 在 金鸡湖 景区 附近 。\"\n  },\n  {\n      \"turn_id\": 4,\n      \"turn_domain\": [\n          \"酒店\"\n      ],\n      \"user_utterance\": \"你想得真周到!这家酒店有停车场吗？\",\n      \"system_utterance\": \"酒店配备有一个免费的停车场。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn request\": [\n              \"停车场\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"酒店\",\n              \"名称\",\n              \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          ],\n          [\n              \"Request\",\n              \"酒店\",\n              \"停车场\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"酒店\",\n              \"停车场\",\n              \"免费\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为2\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\"\n      ],\n      \"segmented_user_utterance\": \"你 想 得 真 周到 ! 这家 酒店 有 停车场 吗 ？\",\n      \"segmented_system_utterance\": \"酒店 配备 有 一个 免费 的 停车场 。\"\n  },\n  {\n      \"turn_id\": 5,\n      \"turn_domain\": [\n          \"酒店\"\n      ],\n      \"user_utterance\": \"酒店的电话告诉我一下。\",\n      \"system_utterance\": \"电话号码：0512-69995666。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {},\n          \"turn request\": [\n              \"电话号码\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Request\",\n              \"酒店\",\n              \"电话号码\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"酒店\",\n              \"电话号码\",\n              \"0512 - 69995666\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为2\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '大床房', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\",\n          \"{'名称': '苏州金鸡湖李公堤瑞贝庭公寓酒店', '区域': '工业园区', '星级': '4', '价位': '中等', '酒店类型': '商务出行', '房型': '标准间', '停车场': '免费', '房费': '438元', '地址': '苏州工业园区李公堤三期E区商业街9幢', '电话号码': '0512-69995666', '评分': 4.6}\"\n      ],\n      \"segmented_user_utterance\": \"酒店 的 电话 告诉 我 一下 。\",\n      \"segmented_system_utterance\": \"电话号码 ： 0512 - 69995666 。\"\n  },\n  {\n      \"turn_id\": 6,\n      \"turn_domain\": [\n          \"餐厅\"\n      ],\n      \"user_utterance\": \"嗯，我还想拔草美食。有没有价位中等的江浙菜餐厅推荐一个。\",\n      \"system_utterance\": \"推荐老枫庭。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\"\n          },\n          \"turn request\": []\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"菜系\",\n              \"江浙 菜\"\n          ],\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"价位\",\n              \"中等\"\n          ],\n          [\n              \"General\",\n              \"餐厅\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Recommend\",\n              \"餐厅\",\n              \"名称\",\n              \"老枫庭\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为3\",\n          \"{'名称': '老枫庭', '区域': '吴中区', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '94元', '地址': '木渎灵天路55号芭提雅休闲娱乐广场5号楼', '电话号码': '0512-66277557', '评分': 4.6, '营业时间': '周一至周日 11:00-13:30 17:00-20:30', '推荐菜': '松鼠桂鱼'}\",\n          \"{'名称': '同里老家', '区域': '吴江', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '87元', '地址': '同里镇环湖西路318号', '电话号码': '0512-63339777', '评分': 4.5, '营业时间': '周一至周日 10:00-22:00', '推荐菜': '太湖三白'}\",\n          \"{'名称': '周庄花间堂桔梗餐厅', '区域': '昆山', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '98元', '地址': '周庄镇中市街110号景点迷楼北10米', '电话号码': '138-1289-9961', '评分': 4.5, '营业时间': '周一至周日 07:00-20:30', '推荐菜': '银鱼炒蛋'}\"\n      ],\n      \"segmented_user_utterance\": \"嗯 ， 我 还 想 拔草 美食 。 有没有 价位 中等 的 江浙 菜 餐厅 推荐 一个 。\",\n      \"segmented_system_utterance\": \"推荐 老枫庭 。\"\n  },\n  {\n      \"turn_id\": 7,\n      \"turn_domain\": [\n          \"餐厅\"\n      ],\n      \"user_utterance\": \"这家餐厅地铁直达吗？\",\n      \"system_utterance\": \"抱歉，餐厅没有地铁直达!\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\",\n              \"餐厅-名称\": \"老枫庭\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {\n              \"餐厅-名称\": \"老枫庭\"\n          },\n          \"turn request\": [\n              \"是否地铁直达\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"名称\",\n              \"老枫庭\"\n          ],\n          [\n              \"Request\",\n              \"餐厅\",\n              \"是否地铁直达\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"是否地铁直达\",\n              \"否\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为1\",\n          \"{'名称': '老枫庭', '区域': '吴中区', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '94元', '地址': '木渎灵天路55号芭提雅休闲娱乐广场5号楼', '电话号码': '0512-66277557', '评分': 4.6, '营业时间': '周一至周日 11:00-13:30 17:00-20:30', '推荐菜': '松鼠桂鱼'}\"\n      ],\n      \"segmented_user_utterance\": \"这家 餐厅 地铁 直达 吗 ？\",\n      \"segmented_system_utterance\": \"抱歉 ， 餐厅 没有 地铁 直达 !\"\n  },\n  {\n      \"turn_id\": 8,\n      \"turn_domain\": [\n          \"餐厅\"\n      ],\n      \"user_utterance\": \"人均消费呢？\",\n      \"system_utterance\": \"老枫庭人均消费在94元左右。\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\",\n              \"餐厅-名称\": \"老枫庭\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {},\n          \"turn request\": [\n              \"人均消费\"\n          ]\n      },\n      \"user_actions\": [\n          [\n              \"Request\",\n              \"餐厅\",\n              \"人均消费\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Inform\",\n              \"餐厅\",\n              \"人均消费\",\n              \"老枫庭\"\n          ]\n      ],\n      \"db_results\": [\n          \"数据库检索结果：成功匹配个数为1\",\n          \"{'名称': '老枫庭', '区域': '吴中区', '菜系': '江浙菜', '价位': '中等', '是否地铁直达': '否', '人均消费': '94元', '地址': '木渎灵天路55号芭提雅休闲娱乐广场5号楼', '电话号码': '0512-66277557', '评分': 4.6, '营业时间': '周一至周日 11:00-13:30 17:00-20:30', '推荐菜': '松鼠桂鱼'}\"\n      ],\n      \"segmented_user_utterance\": \"人均 消费 呢 ？\",\n      \"segmented_system_utterance\": \"老枫庭 人均 消费 在 94 元 左右 。\"\n  },\n  {\n      \"turn_id\": 9,\n      \"turn_domain\": [\n          \"通用\"\n      ],\n      \"user_utterance\": \"好，所有问题都解决了，谢谢你的帮助！再见！\",\n      \"system_utterance\": \"不客气！这是我应该做的！再见！\",\n      \"belief_state\": {\n          \"inform slot-values\": {\n              \"旅游景点-景点类型\": \"山水 景区\",\n              \"旅游景点-消费\": \"偏贵\",\n              \"旅游景点-名称\": \"金鸡湖 景区\",\n              \"餐厅-菜系\": \"江浙 菜\",\n              \"餐厅-价位\": \"中等\",\n              \"餐厅-名称\": \"老枫庭\",\n              \"酒店-价位\": \"中等\",\n              \"酒店-酒店类型\": \"商务 出行\",\n              \"酒店-名称\": \"苏州 金鸡湖 李公堤 瑞贝庭 公寓 酒店\"\n          },\n          \"turn_inform\": {},\n          \"turn request\": []\n      },\n      \"user_actions\": [\n          [\n              \"Bye\",\n              \"通用\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"system_actions\": [\n          [\n              \"Bye\",\n              \"通用\",\n              \"\",\n              \"\"\n          ],\n          [\n              \"General\",\n              \"通用\",\n              \"\",\n              \"\"\n          ]\n      ],\n      \"db_results\": [],\n      \"segmented_user_utterance\": \"好 ， 所有 问题 都 解决 了 ， 谢谢 你 的 帮助 ！ 再见 ！\",\n      \"segmented_system_utterance\": \"不 客气 ！ 这 是 我 应该 做 的 ！ 再见 ！\"\n  }\n]\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRiSAWOZ is the first large-scale multi-domain Chinese Wizard-of-Oz dataset with rich semantic\n              annotations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus contains rich semantic annotations, such as ellipsis and coreference, in addition to\n              traditional dialogue annotations (dialogue states, dialogue acts, etc.), which can be used in various\n              tasks in dialogue system.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNatural Language Understanding, Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference\n              Resolution, Unified Generative Ellipsis and Coreference Resolution\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://terryqj0107.github.io/RiSAWOZ_webpage\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eIn task-oriented dialogue system, the Natural Language Understanding (NLU) module aims to convert the\n                user utterance into the representation that computer can understand, which includes intent and dialogue\n                act (slot \u0026#x26; value) detection.\u003c/li\u003e\n              \u003cli\u003eDialogue State Tracking (DST) is a core component in task-oriented dialogue systems, which extracts\n                dialogue states (user goals) embedded in dialogue context. It has progressed toward open-vocabulary or\n                generation-based DST where state-of-the-art models can generate dialogue states from dialogue context\n                directly.\u003c/li\u003e\n              \u003cli\u003eContext-to-Text Generation: encoding dialogue context to decode system response.\u003c/li\u003e\n              \u003cli\u003eCoreference Resolution: predict coreference clusters where all mentions are referring to the same\n                entity for each dialogue.\u003c/li\u003e\n              \u003cli\u003eUnified Generative Ellipsis and Coreference Resolution: generating omitted or referred expressions\n                from the dialogue context.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNatural Language Understanding, Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference\n              Resolution, Unified Generative Ellipsis and Coreference Resolution\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eNatural Language Understanding:\n                \u003cul\u003e\n                  \u003cli\u003eF1 score: F1 score of user intent.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eDialogue State Tracking:\n                \u003cul\u003e\n                  \u003cli\u003eJoint Accuracy: accuracy of turn-level dialogue states.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eDialogue Context-to-Text Generation:\n                \u003cul\u003e\n                  \u003cli\u003einform rate: measures the percentage that the output contains the appropriate entity the user asks\n                    for.\u003c/li\u003e\n                  \u003cli\u003esuccess rate: estimates the proportion that all the requested attributes have been answered.\u003c/li\u003e\n                  \u003cli\u003eBLEU: the BLEU score of generated system response.\u003c/li\u003e\n                  \u003cli\u003eCombined Score: (inform + success) ∗ 0.5 + BLEU as an overall\n                    quality.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eCoreference Resolution:\n                \u003cul\u003e\n                  \u003cli\u003eMUC F1 Score: a link-based metric. Mentions in the same entity/cluster are considered “linked”.\n                    MUC penalizes the missing links and incorrect links, each with the same weight.\u003c/li\u003e\n                  \u003cli\u003eB3 F1 Score: a mention-based metric.The evaluation score depends on the fraction of the correct\n                    mentions included in the response entities (i.e. entities created by the system).\u003c/li\u003e\n                  \u003cli\u003eCEAFφ4 F1 Score: a metric which assumes each key entity should only be mapped to one response\n                    entity, and vice versa. It aligns the key entities (clusters) with the response entities in the best\n                    way, and compute scores from that alignment.\u003c/li\u003e\n                  \u003cli\u003eAverage F1 Score: an average F1 score of the above three metrics.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003eUnified Generative Ellipsis and Coreference Resolution:\n                \u003cul\u003e\n                  \u003cli\u003eExact Match Rate: measures whether the generated utterances exactly match the ground-truth\n                    utterances.\u003c/li\u003e\n                  \u003cli\u003eBLEU: the BLEU score of generated utterances\u003c/li\u003e\n                  \u003cli\u003eResolution F1: comparing machine-generated words with ground-truth words only from the\n                    ellipsis/coreference part of user utterances.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esee \"Definitions of other metrics\"\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esame as our dataset\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJoint Accuracy, Inform Rate, Success Rate, BLEU Score and Combined Score on MultiWOZ and CrossWOZ\n              dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGather human-to-human dialog in Chinese.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate system response given dialogue context across multiple domains.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003edomains: Attraction, Restaurant, Hotel, Flight, Train, Weather, Movie, TV, Computer, Car, Hospital,\n              Courses\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ehybrid\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRule-based and manual selection criteria\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e51\u0026#x3C;n\u0026#x3C;100\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eChinese native speaker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e3\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e3\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003edialogue_id (string): dialogue ID\u003c/li\u003e\n              \u003cli\u003egoal (string): natural language descriptions of the user goal\u003c/li\u003e\n              \u003cli\u003edomains (list of strings): domains mentioned in current dialogue session\u003c/li\u003e\n              \u003cli\u003eturn_id (int): turn ID\u003c/li\u003e\n              \u003cli\u003eturn_domain (list of strings): domain mentioned in current turn\u003c/li\u003e\n              \u003cli\u003ebelief_state (dict): dialogue state, including:\n                \u003cul\u003e\n                  \u003cli\u003einform slot-values (dict): the slots and corresponding values informed until current turn\u003c/li\u003e\n                  \u003cli\u003eturn_inform (dict): the slots and corresponding values informed in current turn\u003c/li\u003e\n                  \u003cli\u003eturn request (dict): the slots requested in current turn\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003euser_actions (list of lists): user dialogue acts in current turn\u003c/li\u003e\n              \u003cli\u003euser_actions (list of lists): system dialogue acts in current turn\u003c/li\u003e\n              \u003cli\u003edb_results (list of strings): database search results\u003c/li\u003e\n              \u003cli\u003esegmented_user_utterance (string): word segmentation result of user utterance\u003c/li\u003e\n              \u003cli\u003esegmented_system_utterance (string): word segmentation result of system utterance\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators agree using the dataset for research purpose.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAny\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe slots and values as well as utterances do not contain any personal information.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eMaintenance Plan Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the original dataset's maintenance plan.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBuilding a leaderboard webpage to trace and display the latest results on the \u003ca\n                href=\"https://terryqj0107.github.io/RiSAWOZ_webpage/\"\u003edataset\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMaintainer Contact Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide contact information of a person responsible for the dataset maintenance\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDeyi Xiong (\u003ca href=\"mailto:dyxiong@tju.edu.cn\"\u003edyxiong@tju.edu.cn\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny Contestation Mechanism?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the maintenance plan include a contestation mechanism allowing individuals to request removal\n                    fo content?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003econtact maintainer\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContestation Form Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the form link or contact information\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDeyi Xiong (\u003ca href=\"mailto:dyxiong@tju.edu.cn\"\u003edyxiong@tju.edu.cn\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRiSAWOZ is the first large-scale multi-domain Chinese Wizard-of-Oz dataset with rich semantic\n              annotations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUsing the trained model on domains that are not included in the 12 domains selected for this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDesigning models that leverage unknown bias in the dataset to optimize specific metrics.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"RiSAWOZ","type":"Dialog Response Generation","languages":"Mandarin Chinese","summary":"RiSAWOZ is a Chinese dialog dataset. It can be used to study various dialogue tasks, such as Dialogue State Tracking, Dialogue Context-to-Text Generation, Coreference Resolution and Unified Generative Ellipsis and Coreference Resolution."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"RiSAWOZ"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/RotoWire_English-German.html b/data_cards/RotoWire_English-German.html
index 8378ed3c..9b60e9f7 100644
--- a/data_cards/RotoWire_English-German.html
+++ b/data_cards/RotoWire_English-German.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->RotoWire_English-German</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">RotoWire_English-German</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->RotoWire_English-German</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">RotoWire_English-German</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1966,4 +1966,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"RotoWire_English-German","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eRotoWire_English-German\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis dataset is a data-to-text dataset in the basketball domain. The input are tables in a fixed format with\n          statistics about a game (in English) and the target is a German translation of the originally English\n          description. The translations were done by professional translators with basketball experience. The dataset\n          can be used to evaluate the cross-lingual data-to-text capabilities of a model with complex inputs.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/RotoWire_English-German')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/RotoWire_English-German\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://sites.google.com/view/wngt19/dgt-task\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/D19-5601/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGraham Neubig (Carnegie Mellon University), Hiroaki Hayashi (Carnegie Mellon University)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eHiroaki Hayashi\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eGerman\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eDescribe a basketball game given its box score table (and possibly a summary in a foreign language).\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eautomatically created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://sites.google.com/view/wngt19/dgt-task\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/neulab/dgt\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/D19-5601/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{hayashi-etal-2019-findings,\ntitle = \"Findings of the Third Workshop on Neural Generation and Translation\",\nauthor = \"Hayashi, Hiroaki  and\nOda, Yusuke  and\nBirch, Alexandra  and\nKonstas, Ioannis  and\nFinch, Andrew  and\nLuong, Minh-Thang  and\nNeubig, Graham  and\nSudoh, Katsuhito\",\nbooktitle = \"Proceedings of the 3rd Workshop on Neural Generation and Translation\",\nmonth = nov,\nyear = \"2019\",\naddress = \"Hong Kong\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/D19-5601\",\ndoi = \"10.18653/v1/D19-5601\",\npages = \"1--14\",\nabstract = \"This document describes the findings of the Third Workshop on Neural Generation and Translation, held in concert with the annual conference of the Empirical Methods in Natural Language Processing (EMNLP 2019). First, we summarize the research trends of papers presented in the proceedings. Second, we describe the results of the two shared tasks 1) efficient neural machine translation (NMT) where participants were tasked with creating NMT systems that are both accurate and efficient, and 2) document generation and translation (DGT) where participants were tasked with developing systems that generate summaries from structured data, potentially with assistance from text in another language.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHiroaki Hayashi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:hiroakih@andrew.cmu.edu\"\u003ehiroakih@andrew.cmu.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eGerman\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFoster the research on document-level generation technology and contrast the methods for different types\n              of inputs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDescribe a basketball game given its box score table (and possibly a summary in a foreign language).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCarnegie Mellon University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGraham Neubig (Carnegie Mellon University), Hiroaki Hayashi (Carnegie Mellon University)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGraham Neubig\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHiroaki Hayashi (Carnegie Mellon University)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eid\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): The identifier from the original dataset.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): The identifier from GEMv2.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eday\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Date of the game (Format: \u003ccode\u003eMM_DD_YY\u003c/code\u003e)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehome_name\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Home team name.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehome_city\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Home team city name.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evis_name\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Visiting (Away) team name.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evis_city\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Visiting team (Away) city name.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehome_line\u003c/code\u003e (\u003ccode\u003eDict[str, str]\u003c/code\u003e): Home team statistics (e.g., team free throw\n                percentage).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evis_line\u003c/code\u003e (\u003ccode\u003eDict[str, str]\u003c/code\u003e): Visiting team statistics (e.g., team free throw\n                percentage).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ebox_score\u003c/code\u003e (\u003ccode\u003eDict[str, Dict[str, str]]\u003c/code\u003e): Box score table. (Stat_name to\n                [player ID to stat_value].)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary_en\u003c/code\u003e (\u003ccode\u003eList[string]\u003c/code\u003e): Tokenized target summary in English.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esentence_end_index_en\u003c/code\u003e (\u003ccode\u003eList[int]\u003c/code\u003e): Sentence end indices for\n                \u003ccode\u003esummary_en\u003c/code\u003e.\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary_de\u003c/code\u003e (\u003ccode\u003eList[string]\u003c/code\u003e): Tokenized target summary in German.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esentence_end_index_de\u003c/code\u003e (\u003ccode\u003eList[int]\u003c/code\u003e): ): Sentence end indices for\n                \u003ccode\u003esummary_de\u003c/code\u003e.\n              \u003c/li\u003e\n              \u003cli\u003e(Unused) \u003ccode\u003edetok_summary_org\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Original summary provided by RotoWire\n                dataset.\u003c/li\u003e\n              \u003cli\u003e(Unused) \u003ccode\u003esummary\u003c/code\u003e (\u003ccode\u003eList[string]\u003c/code\u003e): Tokenized summary of\n                \u003ccode\u003edetok_summary_org\u003c/code\u003e.\n              \u003c/li\u003e\n              \u003cli\u003e(Unused) \u003ccode\u003edetok_summary\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Detokenized (with organizer's detokenizer)\n                summary of \u003ccode\u003esummary\u003c/code\u003e.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eStructured data are directly imported from the original RotoWire dataset.\u003c/li\u003e\n              \u003cli\u003eTextual data (English, German) are associated to each sample.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'id': '11_02_16-Jazz-Mavericks-TheUtahJazzdefeatedthe',\n'gem_id': 'GEM-RotoWire_English-German-train-0'\n'day': '11_02_16',\n'home_city': 'Utah',\n'home_name': 'Jazz',\n'vis_city': 'Dallas',\n'vis_name': 'Mavericks',\n'home_line': {\n'TEAM-FT_PCT': '58', ...\n},\n'vis_line': {\n'TEAM-FT_PCT': '80', ...\n},\n'box_score': {\n'PLAYER_NAME': {\n'0': 'Harrison Barnes', ...\n}, ...\n'summary_en': ['The', 'Utah', 'Jazz', 'defeated', 'the', 'Dallas', 'Mavericks', ...],\n'sentence_end_index_en': [16, 52, 100, 137, 177, 215, 241, 256, 288],\n'summary_de': ['Die', 'Utah', 'Jazz', 'besiegten', 'am', 'Mittwoch', 'in', 'der', ...],\n'sentence_end_index_de': [19, 57, 107, 134, 170, 203, 229, 239, 266],\n'detok_summary_org': \"The Utah Jazz defeated the Dallas Mavericks 97 - 81 ...\",\n'detok_summary': \"The Utah Jazz defeated the Dallas Mavericks 97-81 ...\",\n'summary': ['The', 'Utah', 'Jazz', 'defeated', 'the', 'Dallas', 'Mavericks', ...],\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTrain\u003c/li\u003e\n              \u003cli\u003eValidation\u003c/li\u003e\n              \u003cli\u003eTest\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eEnglish summaries are provided sentence-by-sentence to professional German translators with basketball\n                knowledge to obtain sentence-level German translations.\u003c/li\u003e\n              \u003cli\u003eSplit criteria follows the original RotoWire dataset.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eThe (English) summary length in the training set varies from 145 to 650 words, with an average of 323\n                words.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe use of two modalities (data, foreign text) to generate a document-level text summary.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe potential use of two modalities (data, foreign text) as input.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTranslation\u003c/li\u003e\n              \u003cli\u003eData-to-text verbalization\u003c/li\u003e\n              \u003cli\u003eAggregation of the two above.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eAdded GEM ID in each sample.\u003c/li\u003e\n              \u003cli\u003eNormalize the number of players in each sample with \"N/A\" for consistent data loading.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/D17-1239\"\u003eChallenges in Data-to-Document Generation\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://ojs.aaai.org//index.php/AAAI/article/view/4668\"\u003eData-to-Text Generation with Content\n                  Selection and Planning\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/D19-5601\"\u003eFindings of the Third Workshop on Neural Generation and\n                  Translation\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eData-to-text\u003c/li\u003e\n              \u003cli\u003eNeural machine translation (NMT)\u003c/li\u003e\n              \u003cli\u003eDocument-level generation and translation (DGT)\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTextual accuracy towards the gold-standard summary.\u003c/li\u003e\n              \u003cli\u003eContent faithfulness to the input structured data.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eModel-based measures proposed by (Wiseman et al., 2017):\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eRelation Generation\u003c/li\u003e\n              \u003cli\u003eContent Selection\u003c/li\u003e\n              \u003cli\u003eContent Ordering\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo evaluate the fidelity of the generated content to the input data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eN/A.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSee Table 2 to 7 of (\u003ca href=\"https://aclanthology.org/D19-5601\"\u003ehttps://aclanthology.org/D19-5601\u003c/a\u003e)\n              for previous results for this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA random subset of RotoWire dataset was chosen for German translation annotation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFoster the research on document-level generation technology and contrast the methods for different types\n              of inputs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRotoWire\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCreated for the dataset\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eCreation Process\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf created for the dataset, describe the creation process.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProfessional German language translators were hired to translate basketball summaries from a subset of\n              RotoWire dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTranslators are familiar with basketball terminology.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBasketball (NBA) game summaries.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSentence-level translations were aligned back to the original English summary sentences.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eautomatically created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSentence-end indices for the tokenized summaries. Sentence boundaries can help users accurately identify\n              aligned sentences in both languages, as well as allowing an accurate evaluation that involves sentence\n              boundaries (ROUGE-L).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated through automated script\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eToken and number overlaps between pairs of aligned sentences are measured.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReusing by citing the original papers:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eSam Wiseman, Stuart M. Shieber, Alexander M. Rush:\n                Challenges in Data-to-Document Generation. EMNLP 2017.\u003c/li\u003e\n              \u003cli\u003eHiroaki Hayashi, Yusuke Oda, Alexandra Birch, Ioannis Konstas, Andrew Finch, Minh-Thang Luong, Graham\n                Neubig, Katsuhito Sudoh. Findings of the Third Workshop on Neural Generation and Translation. WNGT 2019.\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eEnglish text in this dataset is from Rotowire, originally written by writers at Rotowire.com that are\n                likely US-based.\u003c/li\u003e\n              \u003cli\u003eGerman text is produced by professional translators proficient in both English and German.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eStructured data contain real National Basketball Association player and organization names.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePotential overlap of box score tables between splits. This was extensively studied and pointed out by\n              [1].\u003c/p\u003e\n            \u003cp\u003e[1]: Thomson, Craig, Ehud Reiter, and Somayajulu Sripada. \"SportSett: Basketball-A robust and\n              maintainable data-set for Natural Language Generation.\" Proceedings of the Workshop on Intelligent\n              Information Processing and Natural Language Generation. 2020.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUsers may interact with a trained model to learn about a NBA game in a textual manner. On generated\n              texts, they may observe factual errors that contradicts the actual data that the model conditions on.\n              Factual errors include wrong statistics of a player (e.g., 3PT), non-existent injury information.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePublishing the generated text as is. Even if the model achieves high scores on the evaluation metrics,\n              there is a risk of factual errors mentioned above.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"RotoWire_English-German","type":"Data-to-Text","languages":"English, German","summary":"This dataset is a data-to-text dataset in the basketball domain. The input are tables in a fixed format with statistics about a game (in English) and the target is a German translation of the originally English description. The translations were done by professional translators with basketball experience. The dataset can be used to evaluate the cross-lingual data-to-text capabilities of a model with complex inputs."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"RotoWire_English-German"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"RotoWire_English-German","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eRotoWire_English-German\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis dataset is a data-to-text dataset in the basketball domain. The input are tables in a fixed format with\n          statistics about a game (in English) and the target is a German translation of the originally English\n          description. The translations were done by professional translators with basketball experience. The dataset\n          can be used to evaluate the cross-lingual data-to-text capabilities of a model with complex inputs.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/RotoWire_English-German')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/RotoWire_English-German\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://sites.google.com/view/wngt19/dgt-task\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/D19-5601/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGraham Neubig (Carnegie Mellon University), Hiroaki Hayashi (Carnegie Mellon University)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eHiroaki Hayashi\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eGerman\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eDescribe a basketball game given its box score table (and possibly a summary in a foreign language).\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eautomatically created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://sites.google.com/view/wngt19/dgt-task\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/neulab/dgt\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/D19-5601/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{hayashi-etal-2019-findings,\ntitle = \"Findings of the Third Workshop on Neural Generation and Translation\",\nauthor = \"Hayashi, Hiroaki  and\nOda, Yusuke  and\nBirch, Alexandra  and\nKonstas, Ioannis  and\nFinch, Andrew  and\nLuong, Minh-Thang  and\nNeubig, Graham  and\nSudoh, Katsuhito\",\nbooktitle = \"Proceedings of the 3rd Workshop on Neural Generation and Translation\",\nmonth = nov,\nyear = \"2019\",\naddress = \"Hong Kong\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/D19-5601\",\ndoi = \"10.18653/v1/D19-5601\",\npages = \"1--14\",\nabstract = \"This document describes the findings of the Third Workshop on Neural Generation and Translation, held in concert with the annual conference of the Empirical Methods in Natural Language Processing (EMNLP 2019). First, we summarize the research trends of papers presented in the proceedings. Second, we describe the results of the two shared tasks 1) efficient neural machine translation (NMT) where participants were tasked with creating NMT systems that are both accurate and efficient, and 2) document generation and translation (DGT) where participants were tasked with developing systems that generate summaries from structured data, potentially with assistance from text in another language.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHiroaki Hayashi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:hiroakih@andrew.cmu.edu\"\u003ehiroakih@andrew.cmu.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eGerman\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFoster the research on document-level generation technology and contrast the methods for different types\n              of inputs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDescribe a basketball game given its box score table (and possibly a summary in a foreign language).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCarnegie Mellon University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGraham Neubig (Carnegie Mellon University), Hiroaki Hayashi (Carnegie Mellon University)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGraham Neubig\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHiroaki Hayashi (Carnegie Mellon University)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eid\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): The identifier from the original dataset.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): The identifier from GEMv2.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eday\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Date of the game (Format: \u003ccode\u003eMM_DD_YY\u003c/code\u003e)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehome_name\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Home team name.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehome_city\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Home team city name.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evis_name\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Visiting (Away) team name.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evis_city\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Visiting team (Away) city name.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehome_line\u003c/code\u003e (\u003ccode\u003eDict[str, str]\u003c/code\u003e): Home team statistics (e.g., team free throw\n                percentage).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evis_line\u003c/code\u003e (\u003ccode\u003eDict[str, str]\u003c/code\u003e): Visiting team statistics (e.g., team free throw\n                percentage).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ebox_score\u003c/code\u003e (\u003ccode\u003eDict[str, Dict[str, str]]\u003c/code\u003e): Box score table. (Stat_name to\n                [player ID to stat_value].)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary_en\u003c/code\u003e (\u003ccode\u003eList[string]\u003c/code\u003e): Tokenized target summary in English.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esentence_end_index_en\u003c/code\u003e (\u003ccode\u003eList[int]\u003c/code\u003e): Sentence end indices for\n                \u003ccode\u003esummary_en\u003c/code\u003e.\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary_de\u003c/code\u003e (\u003ccode\u003eList[string]\u003c/code\u003e): Tokenized target summary in German.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esentence_end_index_de\u003c/code\u003e (\u003ccode\u003eList[int]\u003c/code\u003e): ): Sentence end indices for\n                \u003ccode\u003esummary_de\u003c/code\u003e.\n              \u003c/li\u003e\n              \u003cli\u003e(Unused) \u003ccode\u003edetok_summary_org\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Original summary provided by RotoWire\n                dataset.\u003c/li\u003e\n              \u003cli\u003e(Unused) \u003ccode\u003esummary\u003c/code\u003e (\u003ccode\u003eList[string]\u003c/code\u003e): Tokenized summary of\n                \u003ccode\u003edetok_summary_org\u003c/code\u003e.\n              \u003c/li\u003e\n              \u003cli\u003e(Unused) \u003ccode\u003edetok_summary\u003c/code\u003e (\u003ccode\u003estring\u003c/code\u003e): Detokenized (with organizer's detokenizer)\n                summary of \u003ccode\u003esummary\u003c/code\u003e.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eStructured data are directly imported from the original RotoWire dataset.\u003c/li\u003e\n              \u003cli\u003eTextual data (English, German) are associated to each sample.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'id': '11_02_16-Jazz-Mavericks-TheUtahJazzdefeatedthe',\n'gem_id': 'GEM-RotoWire_English-German-train-0'\n'day': '11_02_16',\n'home_city': 'Utah',\n'home_name': 'Jazz',\n'vis_city': 'Dallas',\n'vis_name': 'Mavericks',\n'home_line': {\n'TEAM-FT_PCT': '58', ...\n},\n'vis_line': {\n'TEAM-FT_PCT': '80', ...\n},\n'box_score': {\n'PLAYER_NAME': {\n'0': 'Harrison Barnes', ...\n}, ...\n'summary_en': ['The', 'Utah', 'Jazz', 'defeated', 'the', 'Dallas', 'Mavericks', ...],\n'sentence_end_index_en': [16, 52, 100, 137, 177, 215, 241, 256, 288],\n'summary_de': ['Die', 'Utah', 'Jazz', 'besiegten', 'am', 'Mittwoch', 'in', 'der', ...],\n'sentence_end_index_de': [19, 57, 107, 134, 170, 203, 229, 239, 266],\n'detok_summary_org': \"The Utah Jazz defeated the Dallas Mavericks 97 - 81 ...\",\n'detok_summary': \"The Utah Jazz defeated the Dallas Mavericks 97-81 ...\",\n'summary': ['The', 'Utah', 'Jazz', 'defeated', 'the', 'Dallas', 'Mavericks', ...],\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTrain\u003c/li\u003e\n              \u003cli\u003eValidation\u003c/li\u003e\n              \u003cli\u003eTest\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eEnglish summaries are provided sentence-by-sentence to professional German translators with basketball\n                knowledge to obtain sentence-level German translations.\u003c/li\u003e\n              \u003cli\u003eSplit criteria follows the original RotoWire dataset.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eThe (English) summary length in the training set varies from 145 to 650 words, with an average of 323\n                words.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe use of two modalities (data, foreign text) to generate a document-level text summary.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe potential use of two modalities (data, foreign text) as input.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTranslation\u003c/li\u003e\n              \u003cli\u003eData-to-text verbalization\u003c/li\u003e\n              \u003cli\u003eAggregation of the two above.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eAdded GEM ID in each sample.\u003c/li\u003e\n              \u003cli\u003eNormalize the number of players in each sample with \"N/A\" for consistent data loading.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/D17-1239\"\u003eChallenges in Data-to-Document Generation\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://ojs.aaai.org//index.php/AAAI/article/view/4668\"\u003eData-to-Text Generation with Content\n                  Selection and Planning\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/D19-5601\"\u003eFindings of the Third Workshop on Neural Generation and\n                  Translation\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eData-to-text\u003c/li\u003e\n              \u003cli\u003eNeural machine translation (NMT)\u003c/li\u003e\n              \u003cli\u003eDocument-level generation and translation (DGT)\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTextual accuracy towards the gold-standard summary.\u003c/li\u003e\n              \u003cli\u003eContent faithfulness to the input structured data.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eModel-based measures proposed by (Wiseman et al., 2017):\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eRelation Generation\u003c/li\u003e\n              \u003cli\u003eContent Selection\u003c/li\u003e\n              \u003cli\u003eContent Ordering\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo evaluate the fidelity of the generated content to the input data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eN/A.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSee Table 2 to 7 of (\u003ca href=\"https://aclanthology.org/D19-5601\"\u003ehttps://aclanthology.org/D19-5601\u003c/a\u003e)\n              for previous results for this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA random subset of RotoWire dataset was chosen for German translation annotation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFoster the research on document-level generation technology and contrast the methods for different types\n              of inputs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRotoWire\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCreated for the dataset\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eCreation Process\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf created for the dataset, describe the creation process.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProfessional German language translators were hired to translate basketball summaries from a subset of\n              RotoWire dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTranslators are familiar with basketball terminology.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBasketball (NBA) game summaries.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSentence-level translations were aligned back to the original English summary sentences.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eautomatically created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSentence-end indices for the tokenized summaries. Sentence boundaries can help users accurately identify\n              aligned sentences in both languages, as well as allowing an accurate evaluation that involves sentence\n              boundaries (ROUGE-L).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated through automated script\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eToken and number overlaps between pairs of aligned sentences are measured.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReusing by citing the original papers:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eSam Wiseman, Stuart M. Shieber, Alexander M. Rush:\n                Challenges in Data-to-Document Generation. EMNLP 2017.\u003c/li\u003e\n              \u003cli\u003eHiroaki Hayashi, Yusuke Oda, Alexandra Birch, Ioannis Konstas, Andrew Finch, Minh-Thang Luong, Graham\n                Neubig, Katsuhito Sudoh. Findings of the Third Workshop on Neural Generation and Translation. WNGT 2019.\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eEnglish text in this dataset is from Rotowire, originally written by writers at Rotowire.com that are\n                likely US-based.\u003c/li\u003e\n              \u003cli\u003eGerman text is produced by professional translators proficient in both English and German.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eStructured data contain real National Basketball Association player and organization names.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePotential overlap of box score tables between splits. This was extensively studied and pointed out by\n              [1].\u003c/p\u003e\n            \u003cp\u003e[1]: Thomson, Craig, Ehud Reiter, and Somayajulu Sripada. \"SportSett: Basketball-A robust and\n              maintainable data-set for Natural Language Generation.\" Proceedings of the Workshop on Intelligent\n              Information Processing and Natural Language Generation. 2020.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUsers may interact with a trained model to learn about a NBA game in a textual manner. On generated\n              texts, they may observe factual errors that contradicts the actual data that the model conditions on.\n              Factual errors include wrong statistics of a player (e.g., 3PT), non-existent injury information.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePublishing the generated text as is. Even if the model achieves high scores on the evaluation metrics,\n              there is a risk of factual errors mentioned above.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"RotoWire_English-German","type":"Data-to-Text","languages":"English, German","summary":"This dataset is a data-to-text dataset in the basketball domain. The input are tables in a fixed format with statistics about a game (in English) and the target is a German translation of the originally English description. The translations were done by professional translators with basketball experience. The dataset can be used to evaluate the cross-lingual data-to-text capabilities of a model with complex inputs."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"RotoWire_English-German"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/SIMPITIKI.html b/data_cards/SIMPITIKI.html
index 19f4cdaf..50bbeb32 100644
--- a/data_cards/SIMPITIKI.html
+++ b/data_cards/SIMPITIKI.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->SIMPITIKI</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">SIMPITIKI</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Simplification</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->SIMPITIKI</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">SIMPITIKI</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Simplification</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>SIMPITIKI</h2>
@@ -1849,4 +1849,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"SIMPITIKI","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eSIMPITIKI\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eSIMPITIKI is an Italian Simplification dataset. Its examples were selected from Italian Wikipedia such that\n          their editing tracking descriptions contain any of the words \"Simplified\"/\"Simplify\"/\"Simplification\".\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/SIMPITIKI')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/SIMPITIKI\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/dhfbk/simpitiki\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://ceur-ws.org/Vol-1749/paper52.pdf\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSara Tonelli (Fondazione Bruno Kessler), Alessio Palmero Aprosio (Fondazione Bruno Kessler), Francesca\n          Saltori (Fondazione Bruno Kessler)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSara Tonelli\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eItalian\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThis dataset aims to enhance research in text simplification in Italian language with different text\n          transformations.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003elikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/dhfbk/simpitiki\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/dhfbk/simpitiki/tree/master/corpus\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://ceur-ws.org/Vol-1749/paper52.pdf\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{tonelli2016simpitiki,\ntitle={SIMPITIKI: a Simplification corpus for Italian},\nauthor={Tonelli, Sara and Aprosio, Alessio Palmero and Saltori, Francesca},\njournal={Proceedings of CLiC-it},\nyear={2016}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSara Tonelli\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:satonelli@fbk.eu\"\u003esatonelli@fbk.eu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eItalian\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe purpose of the dataset is to train NLG models to simplify complex text by learning different types of\n              transformations (verb to noun, noun to verbs, deletion, insertion, etc)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset aims to enhance research in text simplification in Italian language with different text\n              transformations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindependent\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFondazione Bruno Kessler (FBK)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSara Tonelli (Fondazione Bruno Kessler), Alessio Palmero Aprosio (Fondazione Bruno Kessler), Francesca\n              Saltori (Fondazione Bruno Kessler)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEU Horizon 2020 Programme via the SIMPATICO Project (H2020-EURO-6-2015, n. 692819)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSebastien Montella (Orange Labs), Vipul Raheja (Grammarly Inc.)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach sample comes with the following fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e (string): Unique sample ID\n                -\u003ccode\u003etext\u003c/code\u003e (string): The raw text to be simplified\n                -\u003ccode\u003esimplified_text\u003c/code\u003e (string): The simplified version of \"text\" field\n                -\u003ccode\u003etransformation_type\u003c/code\u003e (string): Nature of transformation applied to raw text in order to\n                simplify it.\n                -\u003ccode\u003esource_dataset\u003c/code\u003e (string): Initial dataset source of sample. Values: 'itwiki' (for Italian\n                Wikipedia) or 'tn' (manually annotated administrative documents from the Municipality of Trento, Italy)\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is organized as a pairs where the raw text (input) is associated with its simplified text\n              (output). The editing transformation and the source dataset of each sample is also provided for advanced\n              analysis.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSIMPITIKI dataset selects documents from Italian Wikipedia such that their editing tracking descriptions\n              contain any of the words \"Simplified\"/\"Simplify\"/\"Simplification\". For the Public Administration domain of\n              the documents of the Municipality of Trento (Italy)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\"transformation_id\": 31, \"transformation_type\": \"Transformation - Lexical Substitution (word level)\", \"source_dataset\": \"tn\", \"text\": \"- assenza per \u0026#x3C;del\u003ee\u0026#x3C;/del\u003esi\u0026#x3C;del\u003egenze\u0026#x3C;/del\u003e particolari attestate da relazione dei servizi sociali;\", \"simplified_text\": \"- assenza per \u0026#x3C;ins\u003ebi\u0026#x3C;/ins\u003es\u0026#x3C;ins\u003eogn\u0026#x3C;/ins\u003ei particolari attestati da relazione dei servizi sociali;\"}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSeveral splits are proposed to train models on different configurations:\u003c/p\u003e\n            \u003cp\u003e-\"train\": Training samples randomly selected from initial corpus. 816 training samples.\n              -\"validation\": Validating samples randomly selected from initial corpus. 174 validating samples.\n              -\"test\": Testing samples randomly selected from initial corpus. 176 validating samples.\n              -\"challenge_seen_transformations_train\": This training challenge split includes specific transformations\n              to simplify the raw text. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert - Verb\",\n              \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word level)\",\n              \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation - Verbal\n              Features\". 562 training samples.\n              -\"challenge_seen_transformations_val\": This validating challenge split includes same transformations than\n              the ones observed in training. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert -\n              Verb\", \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word\n              level)\", \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation -\n              Verbal Features\". 121 validating samples.\n              -\"challenge_seen_transformations_test\": This testing challenge split includes same transformations than\n              the ones observed in training. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert -\n              Verb\", \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word\n              level)\", \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation -\n              Verbal Features\". 127 testing samples.\n              -\"challenge_unseen_transformations_test\" : \"Insert - Subject\", \"Delete - Subject\", \"Transformation -\n              Lexical Substitution (phrase level)\", \"Transformation - Verb to Noun (nominalization)\", \"Transformation -\n              Verbal Voice\". 356 testing samples.\n              -\"challenge_itwiki_train\": This challenge split includes random samples from the Italian Wikipedia as\n              source dataset. 402 training samples.\n              -\"challenge_itwiki_val\": This validating challenge split includes random samples from the Italian\n              Wikipedia as source dataset. 86 validating samples.\n              -\"challenge_itwiki_test\": This testing challenge split includes random samples from the Italian Wikipedia\n              as source dataset. 87 testing samples.\n              -\"challenge_tn_test\": This testing challenge split includes all samples from the Municipality of Trento\n              administrative documents ('tn') as source dataset. 591 testing samples.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe training ratio is set to 0.7. The validation and test somehow equally divide the remaining 30% of the\n              dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset promotes Simplification task for Italian language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eModels can be evaluated if they can simplify text regarding different simplification transformations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe SIMPITIKI dataset provides a single file. Several splits are proposed to train models on different\n              configurations:\n              -\"train\": Training samples randomly selected from initial corpus. 816 training samples.\n              -\"validation\": Validating samples randomly selected from initial corpus. 174 validating samples.\n              -\"test\": Testing samples randomly selected from initial corpus. 176 validating samples.\n              -\"challenge_seen_transformations_train\": This training challenge split includes specific transformations\n              to simplify the raw text. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert - Verb\",\n              \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word level)\",\n              \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation - Verbal\n              Features\". 562 training samples.\n              -\"challenge_seen_transformations_val\": This validating challenge split includes same transformations than\n              the ones observed in training. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert -\n              Verb\", \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word\n              level)\", \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation -\n              Verbal Features\". 121 validating samples.\n              -\"challenge_seen_transformations_test\": This testing challenge split includes same transformations than\n              the ones observed in training. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert -\n              Verb\", \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word\n              level)\", \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation -\n              Verbal Features\". 127 testing samples.\n              -\"challenge_unseen_transformations_test\" : \"Insert - Subject\", \"Delete - Subject\", \"Transformation -\n              Lexical Substitution (phrase level)\", \"Transformation - Verb to Noun (nominalization)\", \"Transformation -\n              Verbal Voice\". 356 testing samples.\n              -\"challenge_itwiki_train\": This challenge split includes random samples from the Italian Wikipedia as\n              source dataset. 402 training samples.\n              -\"challenge_itwiki_val\": This validating challenge split includes random samples from the Italian\n              Wikipedia as source dataset. 86 validating samples.\n              -\"challenge_itwiki_test\": This testing challenge split includes random samples from the Italian Wikipedia\n              as source dataset. 87 testing samples.\n              -\"challenge_tn_test\": This testing challenge split includes all samples from the Municipality of Trento\n              administrative documents ('tn') as source dataset. 591 testing samples.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits allows to investigate the generalization of models regarding editing/transformations\n              (\"challenge_seen_transformations_test\" / \"challenge_unseen_transformations_test\") and for transfer\n              learning to different domain (\"challenge_tn_test\")\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eCoster and Kauchak, Simple English Wikipedia: A New Text Simplification Task, Proceedings of the 49th\n                Annual Meeting of the Association for Computational Linguistics, pages 665–669, Portland, Oregon, June\n                19-24, 2011\u003c/li\u003e\n              \u003cli\u003eXu et al, Optimizing Statistical Machine Translation for Text Simplification, Transactions of the\n                Association for Computational Linguistics, vol. 4, pp. 401–415, 2016\u003c/li\u003e\n              \u003cli\u003eAprosio et al, Neural Text Simplification in Low-Resource Conditions Using Weak Supervision,\n                Proceedings of the Workshop on Methods for Optimizing and Evaluating Neural Language Generation\n                (NeuralGen), pages 37–44, Minneapolis, Minnesota, USA, June 6, 2019\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification: Process that consists in transforming an input text to its simplified version.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits allows to investigate the generalization of models regarding editing/transformations\n              (\"challenge_seen_transformations_test\" / \"challenge_unseen_transformations_test\") and for transfer\n              learning to different domain (\"challenge_tn_test\")\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFKBLEU (\u003ca href=\"https://aclanthology.org/Q16-1029.pdf\"\u003ehttps://aclanthology.org/Q16-1029.pdf\u003c/a\u003e):\n              Combines Flesch-Kincaid Index and iBLEU metrics.\n              SARI (\u003ca href=\"https://aclanthology.org/Q16-1029.pdf\"\u003ehttps://aclanthology.org/Q16-1029.pdf\u003c/a\u003e): Compares\n              system output against references and against the input sentence. It explicitly measures the goodness of\n              words that are added, deleted and kept by the systems\n              Word-level F1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMost of the resources for Text Simplification are in English. To stimulate research to different\n              languages, SIMPITIKI proposes an Italian corpus with Complex-Simple sentence pairs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eText simplification allows a smooth reading of text to enhance understanding.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eItalian Wikipedia\n              (Manually) Annotated administrative documents from the Municipality of Trento, Italy\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e, \u003ccode\u003eOffline media collection\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSIMPITIKI is a combination of documents from Italian Wikipedia and from the Municipality of Trento,\n              Italy.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSamples from documents from the Municipality of Trento corpus are in the administrative domain.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNative speaker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators specified any of the tags as designed by Brunato et al. (\u003ca\n                href=\"https://aclanthology.org/W15-1604/\"\u003ehttps://aclanthology.org/W15-1604/\u003c/a\u003e):\n              -Split: Splitting a clause into two clauses.\n              -Merge: Merge two or more clauses together.\n              -Reordering: Word order changes.\n              -Insert: Insertion of words or phrases that provide supportive information to the original sentence\n              -Delete: dropping redundant information.\n              -Transformation: Modification which can affect the sentence at the lexical, morpho-syntactic and syntactic\n              level: Lexical substitution (word level) / Lexical substitution (phrase level) / Anaphoric replacement /\n              Noun to Verb / Verb to Noun / Verbal voice / Verbal features/ morpho–syntactic and syntactic level, also\n              giving rise to overlapping phenomena\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is available online under the CC-BY 4.0 license.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003elikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe creator of SIMPITIKI wants to promote text simplification for Italian because few resources are\n              available in other languages than English.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe risk of surface-based metrics (BLEU, chrf++, etc) for this task is that semantic adequacy is not\n              respected when simplifying the input document.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"SIMPITIKI","type":"Simplification","languages":"Italian","summary":"SIMPITIKI is an Italian Simplification dataset. Its examples were selected from Italian Wikipedia such that their editing tracking descriptions contain any of the words \"Simplified\"/\"Simplify\"/\"Simplification\"."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"SIMPITIKI"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"SIMPITIKI","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eSIMPITIKI\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eSIMPITIKI is an Italian Simplification dataset. Its examples were selected from Italian Wikipedia such that\n          their editing tracking descriptions contain any of the words \"Simplified\"/\"Simplify\"/\"Simplification\".\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/SIMPITIKI')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/SIMPITIKI\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/dhfbk/simpitiki\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://ceur-ws.org/Vol-1749/paper52.pdf\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSara Tonelli (Fondazione Bruno Kessler), Alessio Palmero Aprosio (Fondazione Bruno Kessler), Francesca\n          Saltori (Fondazione Bruno Kessler)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSara Tonelli\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eItalian\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThis dataset aims to enhance research in text simplification in Italian language with different text\n          transformations.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003elikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/dhfbk/simpitiki\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/dhfbk/simpitiki/tree/master/corpus\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://ceur-ws.org/Vol-1749/paper52.pdf\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{tonelli2016simpitiki,\ntitle={SIMPITIKI: a Simplification corpus for Italian},\nauthor={Tonelli, Sara and Aprosio, Alessio Palmero and Saltori, Francesca},\njournal={Proceedings of CLiC-it},\nyear={2016}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSara Tonelli\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:satonelli@fbk.eu\"\u003esatonelli@fbk.eu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eItalian\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe purpose of the dataset is to train NLG models to simplify complex text by learning different types of\n              transformations (verb to noun, noun to verbs, deletion, insertion, etc)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset aims to enhance research in text simplification in Italian language with different text\n              transformations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindependent\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFondazione Bruno Kessler (FBK)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSara Tonelli (Fondazione Bruno Kessler), Alessio Palmero Aprosio (Fondazione Bruno Kessler), Francesca\n              Saltori (Fondazione Bruno Kessler)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEU Horizon 2020 Programme via the SIMPATICO Project (H2020-EURO-6-2015, n. 692819)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSebastien Montella (Orange Labs), Vipul Raheja (Grammarly Inc.)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach sample comes with the following fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e (string): Unique sample ID\n                -\u003ccode\u003etext\u003c/code\u003e (string): The raw text to be simplified\n                -\u003ccode\u003esimplified_text\u003c/code\u003e (string): The simplified version of \"text\" field\n                -\u003ccode\u003etransformation_type\u003c/code\u003e (string): Nature of transformation applied to raw text in order to\n                simplify it.\n                -\u003ccode\u003esource_dataset\u003c/code\u003e (string): Initial dataset source of sample. Values: 'itwiki' (for Italian\n                Wikipedia) or 'tn' (manually annotated administrative documents from the Municipality of Trento, Italy)\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is organized as a pairs where the raw text (input) is associated with its simplified text\n              (output). The editing transformation and the source dataset of each sample is also provided for advanced\n              analysis.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSIMPITIKI dataset selects documents from Italian Wikipedia such that their editing tracking descriptions\n              contain any of the words \"Simplified\"/\"Simplify\"/\"Simplification\". For the Public Administration domain of\n              the documents of the Municipality of Trento (Italy)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\"transformation_id\": 31, \"transformation_type\": \"Transformation - Lexical Substitution (word level)\", \"source_dataset\": \"tn\", \"text\": \"- assenza per \u0026#x3C;del\u003ee\u0026#x3C;/del\u003esi\u0026#x3C;del\u003egenze\u0026#x3C;/del\u003e particolari attestate da relazione dei servizi sociali;\", \"simplified_text\": \"- assenza per \u0026#x3C;ins\u003ebi\u0026#x3C;/ins\u003es\u0026#x3C;ins\u003eogn\u0026#x3C;/ins\u003ei particolari attestati da relazione dei servizi sociali;\"}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSeveral splits are proposed to train models on different configurations:\u003c/p\u003e\n            \u003cp\u003e-\"train\": Training samples randomly selected from initial corpus. 816 training samples.\n              -\"validation\": Validating samples randomly selected from initial corpus. 174 validating samples.\n              -\"test\": Testing samples randomly selected from initial corpus. 176 validating samples.\n              -\"challenge_seen_transformations_train\": This training challenge split includes specific transformations\n              to simplify the raw text. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert - Verb\",\n              \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word level)\",\n              \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation - Verbal\n              Features\". 562 training samples.\n              -\"challenge_seen_transformations_val\": This validating challenge split includes same transformations than\n              the ones observed in training. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert -\n              Verb\", \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word\n              level)\", \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation -\n              Verbal Features\". 121 validating samples.\n              -\"challenge_seen_transformations_test\": This testing challenge split includes same transformations than\n              the ones observed in training. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert -\n              Verb\", \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word\n              level)\", \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation -\n              Verbal Features\". 127 testing samples.\n              -\"challenge_unseen_transformations_test\" : \"Insert - Subject\", \"Delete - Subject\", \"Transformation -\n              Lexical Substitution (phrase level)\", \"Transformation - Verb to Noun (nominalization)\", \"Transformation -\n              Verbal Voice\". 356 testing samples.\n              -\"challenge_itwiki_train\": This challenge split includes random samples from the Italian Wikipedia as\n              source dataset. 402 training samples.\n              -\"challenge_itwiki_val\": This validating challenge split includes random samples from the Italian\n              Wikipedia as source dataset. 86 validating samples.\n              -\"challenge_itwiki_test\": This testing challenge split includes random samples from the Italian Wikipedia\n              as source dataset. 87 testing samples.\n              -\"challenge_tn_test\": This testing challenge split includes all samples from the Municipality of Trento\n              administrative documents ('tn') as source dataset. 591 testing samples.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe training ratio is set to 0.7. The validation and test somehow equally divide the remaining 30% of the\n              dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset promotes Simplification task for Italian language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eModels can be evaluated if they can simplify text regarding different simplification transformations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe SIMPITIKI dataset provides a single file. Several splits are proposed to train models on different\n              configurations:\n              -\"train\": Training samples randomly selected from initial corpus. 816 training samples.\n              -\"validation\": Validating samples randomly selected from initial corpus. 174 validating samples.\n              -\"test\": Testing samples randomly selected from initial corpus. 176 validating samples.\n              -\"challenge_seen_transformations_train\": This training challenge split includes specific transformations\n              to simplify the raw text. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert - Verb\",\n              \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word level)\",\n              \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation - Verbal\n              Features\". 562 training samples.\n              -\"challenge_seen_transformations_val\": This validating challenge split includes same transformations than\n              the ones observed in training. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert -\n              Verb\", \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word\n              level)\", \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation -\n              Verbal Features\". 121 validating samples.\n              -\"challenge_seen_transformations_test\": This testing challenge split includes same transformations than\n              the ones observed in training. Precisely, transformations are \"Split\", \"Merge\", \"Reordering\", \"Insert -\n              Verb\", \"Insert - Other\", \"Delete - Verb\", \"Delete - Other\", \"Transformation - Lexical Substitution (word\n              level)\", \"Transformation - Anaphoric replacement\", \"Transformation - Noun to Verb\", \"Transformation -\n              Verbal Features\". 127 testing samples.\n              -\"challenge_unseen_transformations_test\" : \"Insert - Subject\", \"Delete - Subject\", \"Transformation -\n              Lexical Substitution (phrase level)\", \"Transformation - Verb to Noun (nominalization)\", \"Transformation -\n              Verbal Voice\". 356 testing samples.\n              -\"challenge_itwiki_train\": This challenge split includes random samples from the Italian Wikipedia as\n              source dataset. 402 training samples.\n              -\"challenge_itwiki_val\": This validating challenge split includes random samples from the Italian\n              Wikipedia as source dataset. 86 validating samples.\n              -\"challenge_itwiki_test\": This testing challenge split includes random samples from the Italian Wikipedia\n              as source dataset. 87 testing samples.\n              -\"challenge_tn_test\": This testing challenge split includes all samples from the Municipality of Trento\n              administrative documents ('tn') as source dataset. 591 testing samples.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits allows to investigate the generalization of models regarding editing/transformations\n              (\"challenge_seen_transformations_test\" / \"challenge_unseen_transformations_test\") and for transfer\n              learning to different domain (\"challenge_tn_test\")\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eCoster and Kauchak, Simple English Wikipedia: A New Text Simplification Task, Proceedings of the 49th\n                Annual Meeting of the Association for Computational Linguistics, pages 665–669, Portland, Oregon, June\n                19-24, 2011\u003c/li\u003e\n              \u003cli\u003eXu et al, Optimizing Statistical Machine Translation for Text Simplification, Transactions of the\n                Association for Computational Linguistics, vol. 4, pp. 401–415, 2016\u003c/li\u003e\n              \u003cli\u003eAprosio et al, Neural Text Simplification in Low-Resource Conditions Using Weak Supervision,\n                Proceedings of the Workshop on Methods for Optimizing and Evaluating Neural Language Generation\n                (NeuralGen), pages 37–44, Minneapolis, Minnesota, USA, June 6, 2019\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification: Process that consists in transforming an input text to its simplified version.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits allows to investigate the generalization of models regarding editing/transformations\n              (\"challenge_seen_transformations_test\" / \"challenge_unseen_transformations_test\") and for transfer\n              learning to different domain (\"challenge_tn_test\")\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFKBLEU (\u003ca href=\"https://aclanthology.org/Q16-1029.pdf\"\u003ehttps://aclanthology.org/Q16-1029.pdf\u003c/a\u003e):\n              Combines Flesch-Kincaid Index and iBLEU metrics.\n              SARI (\u003ca href=\"https://aclanthology.org/Q16-1029.pdf\"\u003ehttps://aclanthology.org/Q16-1029.pdf\u003c/a\u003e): Compares\n              system output against references and against the input sentence. It explicitly measures the goodness of\n              words that are added, deleted and kept by the systems\n              Word-level F1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMost of the resources for Text Simplification are in English. To stimulate research to different\n              languages, SIMPITIKI proposes an Italian corpus with Complex-Simple sentence pairs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eText simplification allows a smooth reading of text to enhance understanding.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eItalian Wikipedia\n              (Manually) Annotated administrative documents from the Municipality of Trento, Italy\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e, \u003ccode\u003eOffline media collection\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSIMPITIKI is a combination of documents from Italian Wikipedia and from the Municipality of Trento,\n              Italy.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSamples from documents from the Municipality of Trento corpus are in the administrative domain.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNative speaker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators specified any of the tags as designed by Brunato et al. (\u003ca\n                href=\"https://aclanthology.org/W15-1604/\"\u003ehttps://aclanthology.org/W15-1604/\u003c/a\u003e):\n              -Split: Splitting a clause into two clauses.\n              -Merge: Merge two or more clauses together.\n              -Reordering: Word order changes.\n              -Insert: Insertion of words or phrases that provide supportive information to the original sentence\n              -Delete: dropping redundant information.\n              -Transformation: Modification which can affect the sentence at the lexical, morpho-syntactic and syntactic\n              level: Lexical substitution (word level) / Lexical substitution (phrase level) / Anaphoric replacement /\n              Noun to Verb / Verb to Noun / Verbal voice / Verbal features/ morpho–syntactic and syntactic level, also\n              giving rise to overlapping phenomena\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is available online under the CC-BY 4.0 license.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003elikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe creator of SIMPITIKI wants to promote text simplification for Italian because few resources are\n              available in other languages than English.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe risk of surface-based metrics (BLEU, chrf++, etc) for this task is that semantic adequacy is not\n              respected when simplifying the input document.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"SIMPITIKI","type":"Simplification","languages":"Italian","summary":"SIMPITIKI is an Italian Simplification dataset. Its examples were selected from Italian Wikipedia such that their editing tracking descriptions contain any of the words \"Simplified\"/\"Simplify\"/\"Simplification\"."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"SIMPITIKI"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/SciDuet.html b/data_cards/SciDuet.html
index 0a4d29d2..c5d185b9 100644
--- a/data_cards/SciDuet.html
+++ b/data_cards/SciDuet.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->SciDuet</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">SciDuet</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Text-to-Slide</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->SciDuet</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">SciDuet</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Text-to-Slide</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1399,4 +1399,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"SciDuet","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eSciDuet\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis dataset supports the document-to-slide generation task where a model has to generate presentation slide\n          content from the text of a document.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/SciDuet')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/SciDuet\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://huggingface.co/datasets/GEM/SciDuet\"\u003eHuggingface\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.111/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eEdward Sun, Yufang Hou, Dakuo Wang, Yunfeng Zhang, Nancy Wang\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://huggingface.co/datasets/GEM/SciDuet\"\u003eHuggingface\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/IBM/document2slides/tree/main/SciDuet-ACL\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.111/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{sun-etal-2021-d2s,\ntitle = \"{D}2{S}: Document-to-Slide Generation Via Query-Based Text Summarization\",\nauthor = \"Sun, Edward  and\nHou, Yufang  and\nWang, Dakuo  and\nZhang, Yunfeng  and\nWang, Nancy X. R.\",\nbooktitle = \"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies\",\nmonth = jun,\nyear = \"2021\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2021.naacl-main.111\",\ndoi = \"10.18653/v1/2021.naacl-main.111\",\npages = \"1405--1418\",\nabstract = \"Presentations are critical for communication in all areas of our lives, yet the creation of slide decks is often tedious and time-consuming. There has been limited research aiming to automate the document-to-slides generation process and all face a critical challenge: no publicly available dataset for training and benchmarking. In this work, we first contribute a new dataset, SciDuet, consisting of pairs of papers and their corresponding slides decks from recent years{'} NLP and ML conferences (e.g., ACL). Secondly, we present D2S, a novel system that tackles the document-to-slides task with a two-step approach: 1) Use slide titles to retrieve relevant and engaging text, figures, and tables; 2) Summarize the retrieved context into bullet points with long-form question answering. Our evaluation suggests that long-form QA outperforms state-of-the-art summarization baselines on both automated ROUGE metrics and qualitative human evaluation.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePromote research on the task of document-to-slides generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eText-to-Slide\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIBM Research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEdward Sun, Yufang Hou, Dakuo Wang, Yunfeng Zhang, Nancy Wang\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIBM Research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYufang Hou (IBM Research), Dakuo Wang (IBM Research)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original papers and slides (both are in PDF format) are carefully processed by a combination of\n              PDF/Image processing tookits. The text contents from multiple slides that correspond to the same slide\n              title are mreged.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTraining, validation and testing data contain 136, 55, and 81 papers from ACL Anthology and their\n              corresponding slides, respectively.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset integrated into GEM is the ACL portion of the whole dataset described in the \u003ca\n                href=\"https://aclanthology.org/2021.naacl-main.111\"\u003epaper\u003c/a\u003e, It contains the full Dev and Test sets,\n              and a portion of the Train dataset.\n              Note that although we cannot release the whole training dataset due to copyright issues, researchers can\n              still use our released data procurement code to generate the training dataset from the online ICML/NeurIPS\n              anthologies.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSciDuet is the first publicaly available dataset for the challenging task of document2slides generation,\n              which requires a model has a good ability to \"understand\" long-form text, choose appropriate content and\n              generate key points.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003econtent selection, long-form text undersanding and generation\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003econtent selection, long-form text undersanding and key points generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatical Evaluation Metric: ROUGE\n              Human Evaluation: (Readability, Informativeness, Consistency)\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eReadability: The generated slide content is coherent, concise, and grammatically correct;\u003c/li\u003e\n              \u003cli\u003eInformativeness: The generated slide provides sufficient and necessary information that corresponds to\n                the given slide title, regardless of its similarity to the original slide;\u003c/li\u003e\n              \u003cli\u003eConsistency: The generated slide content is similar to the original author’s reference slide.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE + Human Evaluation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePaper \"D2S: Document-to-Slide Generation Via Query-Based\n              Text Summarization\" reports 20.47, 5.26 and 19.08 for ROUGE-1, ROUGE-2 and ROUGE-L (f-score).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProvide a benchmark dataset for the document-to-slides task.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eText on papers was extracted through Grobid. Figures andcaptions were extracted through pdffigures. Text\n              on slides was extracted through IBM Watson Discovery package and OCR by pytesseract. Figures and tables\n              that appear on slides and papers were linked through multiscale template matching by OpenCV. Further\n              dataset\n              cleaning was performed with standard string-based\n              heuristics on sentence building, equation and floating caption removal, and duplicate line deletion.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ethe slide context text shouldn't contain additional format information such as \"*** University\"\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original dataset was open-sourced under Apache-2.0.\u003cbr\u003e\n              Some of the original dataset creators are part of the GEM v2 dataset infrastructure team and take care of\n              integrating this dataset into GEM.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"SciDuet","type":"Text-to-Slide","languages":"English","summary":"This dataset supports the document-to-slide generation task where a model has to generate presentation slide content from the text of a document."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"SciDuet"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"SciDuet","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eSciDuet\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis dataset supports the document-to-slide generation task where a model has to generate presentation slide\n          content from the text of a document.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/SciDuet')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/SciDuet\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://huggingface.co/datasets/GEM/SciDuet\"\u003eHuggingface\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.111/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eEdward Sun, Yufang Hou, Dakuo Wang, Yunfeng Zhang, Nancy Wang\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://huggingface.co/datasets/GEM/SciDuet\"\u003eHuggingface\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/IBM/document2slides/tree/main/SciDuet-ACL\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.111/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{sun-etal-2021-d2s,\ntitle = \"{D}2{S}: Document-to-Slide Generation Via Query-Based Text Summarization\",\nauthor = \"Sun, Edward  and\nHou, Yufang  and\nWang, Dakuo  and\nZhang, Yunfeng  and\nWang, Nancy X. R.\",\nbooktitle = \"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies\",\nmonth = jun,\nyear = \"2021\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2021.naacl-main.111\",\ndoi = \"10.18653/v1/2021.naacl-main.111\",\npages = \"1405--1418\",\nabstract = \"Presentations are critical for communication in all areas of our lives, yet the creation of slide decks is often tedious and time-consuming. There has been limited research aiming to automate the document-to-slides generation process and all face a critical challenge: no publicly available dataset for training and benchmarking. In this work, we first contribute a new dataset, SciDuet, consisting of pairs of papers and their corresponding slides decks from recent years{'} NLP and ML conferences (e.g., ACL). Secondly, we present D2S, a novel system that tackles the document-to-slides task with a two-step approach: 1) Use slide titles to retrieve relevant and engaging text, figures, and tables; 2) Summarize the retrieved context into bullet points with long-form question answering. Our evaluation suggests that long-form QA outperforms state-of-the-art summarization baselines on both automated ROUGE metrics and qualitative human evaluation.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePromote research on the task of document-to-slides generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eText-to-Slide\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIBM Research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEdward Sun, Yufang Hou, Dakuo Wang, Yunfeng Zhang, Nancy Wang\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIBM Research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYufang Hou (IBM Research), Dakuo Wang (IBM Research)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original papers and slides (both are in PDF format) are carefully processed by a combination of\n              PDF/Image processing tookits. The text contents from multiple slides that correspond to the same slide\n              title are mreged.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTraining, validation and testing data contain 136, 55, and 81 papers from ACL Anthology and their\n              corresponding slides, respectively.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset integrated into GEM is the ACL portion of the whole dataset described in the \u003ca\n                href=\"https://aclanthology.org/2021.naacl-main.111\"\u003epaper\u003c/a\u003e, It contains the full Dev and Test sets,\n              and a portion of the Train dataset.\n              Note that although we cannot release the whole training dataset due to copyright issues, researchers can\n              still use our released data procurement code to generate the training dataset from the online ICML/NeurIPS\n              anthologies.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSciDuet is the first publicaly available dataset for the challenging task of document2slides generation,\n              which requires a model has a good ability to \"understand\" long-form text, choose appropriate content and\n              generate key points.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003econtent selection, long-form text undersanding and generation\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003econtent selection, long-form text undersanding and key points generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatical Evaluation Metric: ROUGE\n              Human Evaluation: (Readability, Informativeness, Consistency)\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eReadability: The generated slide content is coherent, concise, and grammatically correct;\u003c/li\u003e\n              \u003cli\u003eInformativeness: The generated slide provides sufficient and necessary information that corresponds to\n                the given slide title, regardless of its similarity to the original slide;\u003c/li\u003e\n              \u003cli\u003eConsistency: The generated slide content is similar to the original author’s reference slide.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE + Human Evaluation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePaper \"D2S: Document-to-Slide Generation Via Query-Based\n              Text Summarization\" reports 20.47, 5.26 and 19.08 for ROUGE-1, ROUGE-2 and ROUGE-L (f-score).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProvide a benchmark dataset for the document-to-slides task.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eText on papers was extracted through Grobid. Figures andcaptions were extracted through pdffigures. Text\n              on slides was extracted through IBM Watson Discovery package and OCR by pytesseract. Figures and tables\n              that appear on slides and papers were linked through multiscale template matching by OpenCV. Further\n              dataset\n              cleaning was performed with standard string-based\n              heuristics on sentence building, equation and floating caption removal, and duplicate line deletion.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ethe slide context text shouldn't contain additional format information such as \"*** University\"\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original dataset was open-sourced under Apache-2.0.\u003cbr\u003e\n              Some of the original dataset creators are part of the GEM v2 dataset infrastructure team and take care of\n              integrating this dataset into GEM.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"SciDuet","type":"Text-to-Slide","languages":"English","summary":"This dataset supports the document-to-slide generation task where a model has to generate presentation slide content from the text of a document."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"SciDuet"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/Taskmaster.html b/data_cards/Taskmaster.html
index 4a0f3d49..f057dfaf 100644
--- a/data_cards/Taskmaster.html
+++ b/data_cards/Taskmaster.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Taskmaster</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">Taskmaster</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Taskmaster</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">Taskmaster</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>Taskmaster</h2>
@@ -1780,4 +1780,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"Taskmaster","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eTaskmaster\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis is a large task-oriented dialog dataset in which a model has to produce the response. The input contains\n          the context and a structured representation of what the model is supposed to generate. The input is already\n          pre-formatted as string, turning this into a pure text-to-text problem.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/Taskmaster')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/Taskmaster\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2012.12458\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGoogle researchers\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eKarthik Krishnamoorthi\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ea movie ticketing dialog dataset with 23,789 annotated conversations.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2012.12458\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{byrne2020tickettalk,\ntitle={TicketTalk: Toward human-level performance with end-to-end, transaction-based dialog systems},\nauthor={Byrne, Bill and Krishnamoorthi, Karthik and Ganesh, Saravanan and Kale, Mihir Sanjay},\njournal={arXiv preprint arXiv:2012.12458},\nyear={2020}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eKarthik Krishnamoorthi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:krishnamoorthi@google.com\"\u003ekrishnamoorthi@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialogues\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ea movie ticketing dialog dataset with 23,789 annotated conversations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle researchers\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTosin Adewumi (Luleå University of Technology)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: The unique example id\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003econtext\u003c/code\u003e: The context of the conversation\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget\u003c/code\u003e: A string representing the target\n                -\u003ccode\u003ereferences\u003c/code\u003e: A List representing the target(s)\n                -\u003ccode\u003econversation_id\u003c/code\u003e: A unique ID of the conversation\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'context': \"\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003erating.movie\u0026#x3C;PRAV\u003erated R\u0026#x3C;C\u003e\u0026#x3C;U\u003eI wanna see a movie\u0026#x3C;A\u003ewhere are you?\u0026#x3C;U\u003espring hills kansas\u0026#x3C;PN\u003efind_theaters\u0026#x3C;PAN\u003elocation\u0026#x3C;PAV\u003espring hills kansas\u0026#x3C;PR\u003efind_theaters\u0026#x3C;PRAN\u003ename.theater\u0026#x3C;PRAV\u003eAMC Holiday Theater\u0026#x3C;PRAV\u003eCinemark Downtown\u0026#x3C;A\u003ethere are 2 theaters near you, the AMC Holiday Theater and Cinemark Downtown. Did you know which movie you'd like to see?\u0026#x3C;U\u003efunny one please\u0026#x3C;PN\u003efind_movies\u0026#x3C;PAN\u003elocation\u0026#x3C;PAV\u003espring hills kansas\u0026#x3C;PR\u003efind_movies\u0026#x3C;PRAN\u003ename.movie\u0026#x3C;PRAV\u003eNot My Problem\u0026#x3C;PRAV\u003eFamily Jewels\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eNot My Problem\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.genre\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.genre\u0026#x3C;PRAV\u003ecomedy\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eNot My Problem\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.person\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.person\u0026#x3C;PRAV\u003eMatt Damon\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eNot My Problem\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.person\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.person\u0026#x3C;PRAV\u003eNoah Schnapp\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eFamily Jewels\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.genre\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.genre\u0026#x3C;PRAV\u003eromantic comedy\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eFamily Jewels\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.person\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.person\u0026#x3C;PRAV\u003eMelissa McCarthy\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eFamily Jewels\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.person\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.person\u0026#x3C;PRAV\u003eRyan Reynolds\u0026#x3C;A\u003eThere's the comedy film called Not My Problem starring Matt Damon and Noah Schnapp. There's also a romantic comedy called Family Jewels starring Melissa McCarthy and Ryan Reynolds.\u0026#x3C;U\u003ewhat ratings are there?\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eNot My Problem\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003erating.movie\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003erating.movie\u0026#x3C;PRAV\u003erated PG-13\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eFamily Jewels\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003erating.movie\",\n'conversation_id': 'dlg-d1f52e7e-c34c-4e85-b406-85ed138b5068',\n'gem_id': 'Taskmaster-train-0',\n'references': ['Not My Problem is rated PG-13 and Family Jewels is rated R.'],\n'target': 'Not My Problem is rated PG-13 and Family Jewels is rated R.'}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e-\u003ccode\u003etrain\u003c/code\u003e: 187182 examples\n              -\u003ccode\u003edev\u003c/code\u003e: 23406 examples\n              -\u003ccode\u003etest\u003c/code\u003e: 23316 examples\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialogue generation that makes sense\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003egem_id field was added to the 3 data splits\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\"\u003ehttps://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU: 60\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eautomatic evaluation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ea movie ticketing dialog dataset with 23,789 annotated conversations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eParticipatory experiment\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTicketing\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt's based on ticketing without personal information\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"Taskmaster","type":"Dialog Response Generation","languages":"English","summary":"This is a large task-oriented dialog dataset in which a model has to produce the response. The input contains the context and a structured representation of what the model is supposed to generate. The input is already pre-formatted as string, turning this into a pure text-to-text problem."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"Taskmaster"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"Taskmaster","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eTaskmaster\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis is a large task-oriented dialog dataset in which a model has to produce the response. The input contains\n          the context and a structured representation of what the model is supposed to generate. The input is already\n          pre-formatted as string, turning this into a pure text-to-text problem.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/Taskmaster')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/Taskmaster\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2012.12458\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGoogle researchers\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eKarthik Krishnamoorthi\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ea movie ticketing dialog dataset with 23,789 annotated conversations.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2012.12458\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{byrne2020tickettalk,\ntitle={TicketTalk: Toward human-level performance with end-to-end, transaction-based dialog systems},\nauthor={Byrne, Bill and Krishnamoorthi, Karthik and Ganesh, Saravanan and Kale, Mihir Sanjay},\njournal={arXiv preprint arXiv:2012.12458},\nyear={2020}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eKarthik Krishnamoorthi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:krishnamoorthi@google.com\"\u003ekrishnamoorthi@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialogues\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ea movie ticketing dialog dataset with 23,789 annotated conversations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle researchers\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTosin Adewumi (Luleå University of Technology)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: The unique example id\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003econtext\u003c/code\u003e: The context of the conversation\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget\u003c/code\u003e: A string representing the target\n                -\u003ccode\u003ereferences\u003c/code\u003e: A List representing the target(s)\n                -\u003ccode\u003econversation_id\u003c/code\u003e: A unique ID of the conversation\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'context': \"\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003erating.movie\u0026#x3C;PRAV\u003erated R\u0026#x3C;C\u003e\u0026#x3C;U\u003eI wanna see a movie\u0026#x3C;A\u003ewhere are you?\u0026#x3C;U\u003espring hills kansas\u0026#x3C;PN\u003efind_theaters\u0026#x3C;PAN\u003elocation\u0026#x3C;PAV\u003espring hills kansas\u0026#x3C;PR\u003efind_theaters\u0026#x3C;PRAN\u003ename.theater\u0026#x3C;PRAV\u003eAMC Holiday Theater\u0026#x3C;PRAV\u003eCinemark Downtown\u0026#x3C;A\u003ethere are 2 theaters near you, the AMC Holiday Theater and Cinemark Downtown. Did you know which movie you'd like to see?\u0026#x3C;U\u003efunny one please\u0026#x3C;PN\u003efind_movies\u0026#x3C;PAN\u003elocation\u0026#x3C;PAV\u003espring hills kansas\u0026#x3C;PR\u003efind_movies\u0026#x3C;PRAN\u003ename.movie\u0026#x3C;PRAV\u003eNot My Problem\u0026#x3C;PRAV\u003eFamily Jewels\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eNot My Problem\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.genre\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.genre\u0026#x3C;PRAV\u003ecomedy\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eNot My Problem\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.person\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.person\u0026#x3C;PRAV\u003eMatt Damon\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eNot My Problem\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.person\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.person\u0026#x3C;PRAV\u003eNoah Schnapp\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eFamily Jewels\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.genre\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.genre\u0026#x3C;PRAV\u003eromantic comedy\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eFamily Jewels\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.person\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.person\u0026#x3C;PRAV\u003eMelissa McCarthy\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eFamily Jewels\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003ename.person\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003ename.person\u0026#x3C;PRAV\u003eRyan Reynolds\u0026#x3C;A\u003eThere's the comedy film called Not My Problem starring Matt Damon and Noah Schnapp. There's also a romantic comedy called Family Jewels starring Melissa McCarthy and Ryan Reynolds.\u0026#x3C;U\u003ewhat ratings are there?\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eNot My Problem\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003erating.movie\u0026#x3C;PR\u003eget_movie_attribute\u0026#x3C;PRAN\u003erating.movie\u0026#x3C;PRAV\u003erated PG-13\u0026#x3C;PN\u003eget_movie_attribute\u0026#x3C;PAN\u003ename.movie\u0026#x3C;PAV\u003eFamily Jewels\u0026#x3C;PAN\u003eattribute\u0026#x3C;PAV\u003erating.movie\",\n'conversation_id': 'dlg-d1f52e7e-c34c-4e85-b406-85ed138b5068',\n'gem_id': 'Taskmaster-train-0',\n'references': ['Not My Problem is rated PG-13 and Family Jewels is rated R.'],\n'target': 'Not My Problem is rated PG-13 and Family Jewels is rated R.'}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e-\u003ccode\u003etrain\u003c/code\u003e: 187182 examples\n              -\u003ccode\u003edev\u003c/code\u003e: 23406 examples\n              -\u003ccode\u003etest\u003c/code\u003e: 23316 examples\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialogue generation that makes sense\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003egem_id field was added to the 3 data splits\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\"\u003ehttps://github.com/google-research-datasets/Taskmaster/tree/master/TM-3-2020\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU: 60\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eautomatic evaluation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ea movie ticketing dialog dataset with 23,789 annotated conversations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eParticipatory experiment\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTicketing\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt's based on ticketing without personal information\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNA\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"Taskmaster","type":"Dialog Response Generation","languages":"English","summary":"This is a large task-oriented dialog dataset in which a model has to produce the response. The input contains the context and a structured representation of what the model is supposed to generate. The input is already pre-formatted as string, turning this into a pure text-to-text problem."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"Taskmaster"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/cochrane-simplification.html b/data_cards/cochrane-simplification.html
index ade163b2..a380a5ed 100644
--- a/data_cards/cochrane-simplification.html
+++ b/data_cards/cochrane-simplification.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->cochrane-simplification</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">cochrane-simplification</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Simplification</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->cochrane-simplification</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">cochrane-simplification</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Simplification</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>cochrane-simplification</h2>
@@ -1444,4 +1444,4 @@ <h5>Unsuited Applications
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"cochrane-simplification","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ecochrane-simplification\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eCochrane is an English dataset for paragraph-level simplification of medical texts. Cochrane is a database of\n          systematic reviews of clinical questions, many of which have summaries in plain English targeting readers\n          without a university education. The dataset comprises about 4,500 of such pairs.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\n  data = datasets.load_dataset('GEM/cochrane-simplification')\n  \u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/cochrane-simplification\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/AshOlogn/Paragraph-level-Simplification-of-Medical-Texts\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.395/\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAshwin Devaraj (The University of Texas at Austin), Iain J. Marshall (King's College London), Byron C.\n          Wallace\n          (Northeastern University), Junyi Jessy Li (The University of Texas at Austin)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAshwin Devaraj\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eA model trained on this dataset can be used to simplify medical texts to make them more accessible to readers\n          without medical expertise.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or\n                subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/AshOlogn/Paragraph-level-Simplification-of-Medical-Texts\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/AshOlogn/Paragraph-level-Simplification-of-Medical-Texts\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.395/\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL\n                    anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{devaraj-etal-2021-paragraph,\n      title = \"Paragraph-level Simplification of Medical Texts\",\n      author = \"Devaraj, Ashwin  and\n        Marshall, Iain  and\n        Wallace, Byron  and\n        Li, Junyi Jessy\",\n      booktitle = \"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies\",\n      month = jun,\n      year = \"2021\",\n      address = \"Online\",\n      publisher = \"Association for Computational Linguistics\",\n      url = \"https://aclanthology.org/2021.naacl-main.395\",\n      doi = \"10.18653/v1/2021.naacl-main.395\",\n      pages = \"4972--4984\",\n  }\n  \u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAshwin Devaraj\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:ashwin.devaraj@utexas.edu\"\u003eashwin.devaraj@utexas.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe intended use of this dataset is to train models that simplify medical text at the paragraph level so\n              that it may be more accessible to the lay reader.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA model trained on this dataset can be used to simplify medical texts to make them more accessible to\n              readers without medical expertise.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe University of Texas at Austin, King's College London, Northeastern University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAshwin Devaraj (The University of Texas at Austin), Iain J. Marshall (King's College London), Byron C.\n              Wallace (Northeastern University), Junyi Jessy Li (The University of Texas at Austin)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNational Institutes of Health (NIH) grant R01-LM012086, National Science Foundation (NSF) grant\n              IIS-1850153, Texas Advanced Computing Center (TACC) computational resources\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved\n                    in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAshwin Devaraj (The University of Texas at Austin)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: string, a unique identifier for the example\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edoi\u003c/code\u003e: string, DOI identifier for the Cochrane review from which the example was generated\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esource\u003c/code\u003e: string, an excerpt from an abstract of a Cochrane review\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget\u003c/code\u003e: string, an excerpt from the plain-language summary of a Cochrane review that\n                roughly aligns with the source text\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n      \"gem_id\": \"gem-cochrane-simplification-train-766\",\n      \"doi\": \"10.1002/14651858.CD002173.pub2\",\n      \"source\": \"Of 3500 titles retrieved from the literature, 24 papers reporting on 23 studies could be included in the review. The studies were published between 1970 and 1997 and together included 1026 participants. Most were cross-over studies. Few studies provided sufficient information to judge the concealment of allocation. Four studies provided results for the percentage of symptom-free days. Pooling the results did not reveal a statistically significant difference between sodium cromoglycate and placebo. For the other pooled outcomes, most of the symptom-related outcomes and bronchodilator use showed statistically significant results, but treatment effects were small. Considering the confidence intervals of the outcome measures, a clinically relevant effect of sodium cromoglycate cannot be excluded. The funnel plot showed an under-representation of small studies with negative results, suggesting publication bias. There is insufficient evidence to be sure about the efficacy of sodium cromoglycate over placebo. Publication bias is likely to have overestimated the beneficial effects of sodium cromoglycate as maintenance therapy in childhood asthma.\",\n      \"target\": \"In this review we aimed to determine whether there is evidence for the effectiveness of inhaled sodium cromoglycate as maintenance treatment in children with chronic asthma. Most of the studies were carried out in small groups of patients. Furthermore, we suspect that not all studies undertaken have been published. The results show that there is insufficient evidence to be sure about the beneficial effect of sodium cromoglycate compared to placebo. However, for several outcome measures the results favoured sodium cromoglycate.\"\n  }\n  \u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003etrain\u003c/code\u003e: 3568 examples\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evalidation\u003c/code\u003e: 411 examples\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etest\u003c/code\u003e: 480 examples\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is the first paragraph-level simplification dataset published (as prior work had primarily\n              focused on simplifying individual sentences). Furthermore, this dataset is in the medical domain, which is\n              an especially useful domain for text simplification.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset measures the ability for a model to simplify paragraphs of medical text through the omission\n              non-salient information and simplification of medical jargon.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset measures the ability for a model to simplify paragraphs of medical text through the omission\n              non-salient information and simplification of medical jargon.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSARI measures the quality of text simplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe paper which introduced this dataset trained BART models (pretrained on XSum) with unlikelihood\n              training\n              to produce simplification models achieving maximum SARI and BLEU scores of 40 and 43 respectively.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators\n                    or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented\n                    in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset can be used to simplify medical texts that may otherwise be inaccessible to those without\n              medical training.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways\n                    members of different social categories are represented that can have harmful downstream consequences\n                    for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution\n                    of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was generated from abstracts and plain-language summaries of medical literature reviews that\n              were written by medical professionals and thus does was not generated by people representative of the\n              entire\n              English-speaking population.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when\n                    possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe main limitation of this dataset is that the information alignment between the abstract and\n              plain-language summary is often rough, so the plain-language summary may contain information that isn't\n              found in the abstract. Furthermore, the plain-language targets often contain formulaic statements like\n              \"this\n              evidence is current to [month][year]\" not found in the abstracts. Another limitation is that some\n              plain-language summaries do not simplify the technical abstracts very much and still contain medical\n              jargon.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of\n                    the general task featured in this dataset that its curation or properties make it less suitable for.\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe main pitfall to look out for is errors in factuality. Simplification work so far has not placed a\n              strong emphasis on the logical fidelity of model generations with the input text, and the paper\n              introducing\n              this dataset does not explore modeling techniques to combat this. These kinds of errors are especially\n              pernicious in the medical domain, and the models introduced in the paper do occasionally alter entities\n              like\n              disease and medication names.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"cochrane-simplification","type":"Simplification","languages":"English","summary":"Cochrane is an English dataset for paragraph-level simplification of medical texts. Cochrane is a database of systematic reviews of clinical questions, many of which have summaries in plain English targeting readers without a university education. The dataset comprises about 4,500 of such pairs."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"cochrane-simplification"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"cochrane-simplification","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ecochrane-simplification\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eCochrane is an English dataset for paragraph-level simplification of medical texts. Cochrane is a database of\n          systematic reviews of clinical questions, many of which have summaries in plain English targeting readers\n          without a university education. The dataset comprises about 4,500 of such pairs.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\n  data = datasets.load_dataset('GEM/cochrane-simplification')\n  \u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/cochrane-simplification\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/AshOlogn/Paragraph-level-Simplification-of-Medical-Texts\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.395/\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAshwin Devaraj (The University of Texas at Austin), Iain J. Marshall (King's College London), Byron C.\n          Wallace\n          (Northeastern University), Junyi Jessy Li (The University of Texas at Austin)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAshwin Devaraj\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eA model trained on this dataset can be used to simplify medical texts to make them more accessible to readers\n          without medical expertise.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or\n                subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/AshOlogn/Paragraph-level-Simplification-of-Medical-Texts\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/AshOlogn/Paragraph-level-Simplification-of-Medical-Texts\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.395/\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL\n                    anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{devaraj-etal-2021-paragraph,\n      title = \"Paragraph-level Simplification of Medical Texts\",\n      author = \"Devaraj, Ashwin  and\n        Marshall, Iain  and\n        Wallace, Byron  and\n        Li, Junyi Jessy\",\n      booktitle = \"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies\",\n      month = jun,\n      year = \"2021\",\n      address = \"Online\",\n      publisher = \"Association for Computational Linguistics\",\n      url = \"https://aclanthology.org/2021.naacl-main.395\",\n      doi = \"10.18653/v1/2021.naacl-main.395\",\n      pages = \"4972--4984\",\n  }\n  \u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAshwin Devaraj\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:ashwin.devaraj@utexas.edu\"\u003eashwin.devaraj@utexas.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe intended use of this dataset is to train models that simplify medical text at the paragraph level so\n              that it may be more accessible to the lay reader.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA model trained on this dataset can be used to simplify medical texts to make them more accessible to\n              readers without medical expertise.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe University of Texas at Austin, King's College London, Northeastern University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAshwin Devaraj (The University of Texas at Austin), Iain J. Marshall (King's College London), Byron C.\n              Wallace (Northeastern University), Junyi Jessy Li (The University of Texas at Austin)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNational Institutes of Health (NIH) grant R01-LM012086, National Science Foundation (NSF) grant\n              IIS-1850153, Texas Advanced Computing Center (TACC) computational resources\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved\n                    in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAshwin Devaraj (The University of Texas at Austin)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: string, a unique identifier for the example\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edoi\u003c/code\u003e: string, DOI identifier for the Cochrane review from which the example was generated\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esource\u003c/code\u003e: string, an excerpt from an abstract of a Cochrane review\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget\u003c/code\u003e: string, an excerpt from the plain-language summary of a Cochrane review that\n                roughly aligns with the source text\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n      \"gem_id\": \"gem-cochrane-simplification-train-766\",\n      \"doi\": \"10.1002/14651858.CD002173.pub2\",\n      \"source\": \"Of 3500 titles retrieved from the literature, 24 papers reporting on 23 studies could be included in the review. The studies were published between 1970 and 1997 and together included 1026 participants. Most were cross-over studies. Few studies provided sufficient information to judge the concealment of allocation. Four studies provided results for the percentage of symptom-free days. Pooling the results did not reveal a statistically significant difference between sodium cromoglycate and placebo. For the other pooled outcomes, most of the symptom-related outcomes and bronchodilator use showed statistically significant results, but treatment effects were small. Considering the confidence intervals of the outcome measures, a clinically relevant effect of sodium cromoglycate cannot be excluded. The funnel plot showed an under-representation of small studies with negative results, suggesting publication bias. There is insufficient evidence to be sure about the efficacy of sodium cromoglycate over placebo. Publication bias is likely to have overestimated the beneficial effects of sodium cromoglycate as maintenance therapy in childhood asthma.\",\n      \"target\": \"In this review we aimed to determine whether there is evidence for the effectiveness of inhaled sodium cromoglycate as maintenance treatment in children with chronic asthma. Most of the studies were carried out in small groups of patients. Furthermore, we suspect that not all studies undertaken have been published. The results show that there is insufficient evidence to be sure about the beneficial effect of sodium cromoglycate compared to placebo. However, for several outcome measures the results favoured sodium cromoglycate.\"\n  }\n  \u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003etrain\u003c/code\u003e: 3568 examples\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evalidation\u003c/code\u003e: 411 examples\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etest\u003c/code\u003e: 480 examples\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is the first paragraph-level simplification dataset published (as prior work had primarily\n              focused on simplifying individual sentences). Furthermore, this dataset is in the medical domain, which is\n              an especially useful domain for text simplification.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset measures the ability for a model to simplify paragraphs of medical text through the omission\n              non-salient information and simplification of medical jargon.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset measures the ability for a model to simplify paragraphs of medical text through the omission\n              non-salient information and simplification of medical jargon.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSARI measures the quality of text simplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe paper which introduced this dataset trained BART models (pretrained on XSum) with unlikelihood\n              training\n              to produce simplification models achieving maximum SARI and BLEU scores of 40 and 43 respectively.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators\n                    or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented\n                    in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset can be used to simplify medical texts that may otherwise be inaccessible to those without\n              medical training.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways\n                    members of different social categories are represented that can have harmful downstream consequences\n                    for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution\n                    of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was generated from abstracts and plain-language summaries of medical literature reviews that\n              were written by medical professionals and thus does was not generated by people representative of the\n              entire\n              English-speaking population.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when\n                    possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe main limitation of this dataset is that the information alignment between the abstract and\n              plain-language summary is often rough, so the plain-language summary may contain information that isn't\n              found in the abstract. Furthermore, the plain-language targets often contain formulaic statements like\n              \"this\n              evidence is current to [month][year]\" not found in the abstracts. Another limitation is that some\n              plain-language summaries do not simplify the technical abstracts very much and still contain medical\n              jargon.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of\n                    the general task featured in this dataset that its curation or properties make it less suitable for.\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe main pitfall to look out for is errors in factuality. Simplification work so far has not placed a\n              strong emphasis on the logical fidelity of model generations with the input text, and the paper\n              introducing\n              this dataset does not explore modeling techniques to combat this. These kinds of errors are especially\n              pernicious in the medical domain, and the models introduced in the paper do occasionally alter entities\n              like\n              disease and medication names.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"cochrane-simplification","type":"Simplification","languages":"English","summary":"Cochrane is an English dataset for paragraph-level simplification of medical texts. Cochrane is a database of systematic reviews of clinical questions, many of which have summaries in plain English targeting readers without a university education. The dataset comprises about 4,500 of such pairs."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"cochrane-simplification"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/common_gen.html b/data_cards/common_gen.html
index 363575a5..584bbe53 100644
--- a/data_cards/common_gen.html
+++ b/data_cards/common_gen.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->common_gen</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">common_gen</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Reasoning</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->common_gen</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">common_gen</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Reasoning</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>common_gen</h2>
@@ -1985,4 +1985,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"common_gen","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ecommon_gen\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eCommonGen is an English text generation task to explicitly test machines for the ability of generative\n          commonsense reasoning. Given a set of common concepts, the task is to generate a coherent sentence describing\n          an everyday scenario using these concepts. CommonGen is challenging because it inherently requires 1)\n          relational reasoning using background commonsense knowledge, and 2) compositional generalization ability to\n          work on unseen concept combinations. The dataset, constructed through a combination of crowd-sourcing from AMT\n          and existing caption corpora, consists of 30k concept-sets and 50k sentences in total. Note that the CommonGen\n          test set is private and requires submission to the external leaderboard.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/common_gen')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/common_gen\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://inklab.usc.edu/CommonGen/\"\u003elink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.findings-emnlp.165\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eBill Yuchen Lin (USC), Wangchunshu Zhou (USC), Ming Shen (USC), Pei Zhou (USC), Chandra Bhagavatula\n          (AllenAI), Yejin Choi (AllenAI + UW), Xiang Ren (USC)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eBill Yuchen Lin\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003emit: MIT License\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe speaker is required to produce a \u003cem\u003ecoherent\u003c/em\u003e sentence which mentions all of the source concepts,\n          and which describes a \u003cem\u003elikely\u003c/em\u003e situation that could be captured in a picture or video.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://inklab.usc.edu/CommonGen/\"\u003elink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/INK-USC/CommonGen\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.findings-emnlp.165\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{lin-etal-2020-commongen,\ntitle = \"{C}ommon{G}en: A Constrained Text Generation Challenge for Generative Commonsense Reasoning\",\nauthor = \"Lin, Bill Yuchen  and\nZhou, Wangchunshu  and\nShen, Ming  and\nZhou, Pei  and\nBhagavatula, Chandra  and\nChoi, Yejin  and\nRen, Xiang\",\nbooktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2020\",\nmonth = nov,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/2020.findings-emnlp.165\",\npages = \"1823--1840\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBill Yuchen Lin\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:yuchen.lin@usc.edu\"\u003eyuchen.lin@usc.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://inklab.usc.edu/CommonGen/leaderboard.html\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe model outputs are evaluated against the crowdsourced references, and ranked by SPICE score. The\n              leaderboard also reports BLEU-4 and CIDEr scores.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo information is provided on regional restrictions and we thus assume that the covered dialects are\n              those spoken by raters on Mechanical Turk.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe concepts were extracted from multiple English image captioning datasets and the data was collected\n              via Amazon Mechanical Turk. No information on regional restrictions is provided.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emit: MIT License\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCommonGen is a constrained text generation task, associated with a benchmark dataset, to explicitly test\n              machines for the ability of generative commonsense reasoning.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a \u003cem\u003ecoherent\u003c/em\u003e sentence which mentions all of the source\n              concepts, and which describes a \u003cem\u003elikely\u003c/em\u003e situation that could be captured in a picture or video.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindependent\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was curated by a joint team of researchers from the University of Southern California and\n              Allen Institute for Artificial Intelligence.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBill Yuchen Lin (USC), Wangchunshu Zhou (USC), Ming Shen (USC), Pei Zhou (USC), Chandra Bhagavatula\n              (AllenAI), Yejin Choi (AllenAI + UW), Xiang Ren (USC)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe research is based upon work supported in part by the Office of the Director of National Intelligence\n              (ODNI), Intelligence Advanced Research Projects Activity (IARPA), the DARPA MCS program, and NSF SMA\n              18-29268.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYacine Jernite created the initial data card. It was later extended by Simon Mille. Sebastian Gehrmann\n              migrated it to the GEMv2 format.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA data instance has the following fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003econcepts\u003c/code\u003e: a \u003ccode\u003elist\u003c/code\u003e of \u003ccode\u003estring\u003c/code\u003e values denoting the concept the\n                system should write about. Has 3 to 5 items, constitutes the \u003ccode\u003einput\u003c/code\u003e of the task.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget\u003c/code\u003e: a sentence \u003ccode\u003estring\u003c/code\u003e mentioning all of the above mentioned\n                \u003ccode\u003econcepts\u003c/code\u003e. Constitutes the desired \u003ccode\u003eoutput\u003c/code\u003e of the task.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e[\n{\n\"concepts\": ['ski', 'mountain', 'skier'],\n\"target\": 'Skier skis down the mountain',\n},\n{\n\"concepts\": ['ski', 'mountain', 'skier'],\n\"target\": 'Three skiers are skiing on a snowy mountain.',\n},\n]\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach example in the dataset consists of a set of 3 to 5 concepts denoted by a single noun, verb, or\n              adjective (the input), and a sentence using these concepts (the output). The dataset provides several such\n              sentences for each such concept.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eTotal concept-sets\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e32,651\u003c/td\u003e\n                    \u003ctd\u003e993\u003c/td\u003e\n                    \u003ctd\u003e1,497\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eTotal sentences\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e67,389\u003c/td\u003e\n                    \u003ctd\u003e4,018\u003c/td\u003e\n                    \u003ctd\u003e6,042\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eAverage sentence length\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e10.54\u003c/td\u003e\n                    \u003ctd\u003e11.55\u003c/td\u003e\n                    \u003ctd\u003e13.34\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dev and test set were created by sampling sets of concepts of size 4 or 5 (and as many of size 3 for\n              the dev set) present in the source captioning datasets and having crowd-workers write reference sentences\n              using these concepts.\u003c/p\u003e\n            \u003cp\u003eConversely, the training set has more concept sets of size 3 than of size 4 and 5, and uses the original\n              captions from the source datasets as references.\u003c/p\u003e\n            \u003cp\u003eThe authors also ensured that the training, dev and test set have different combinations of unique\n              concepts to ensure compositionality (details in \u003ca href=\"https://arxiv.org/pdf/1911.03705v3.pdf\"\u003eTable\n                1\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCommonGen is a medium sized corpus with a unique reasoning challenge and interesting evaluation\n              possibilities.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCommonsense reasoning\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4 challenge sets for CommenGen were added to the GEM evaluation suite.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003col\u003e\n              \u003cli\u003eData Shift\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eWe created subsets of the training and development sets of ~500 randomly selected inputs each.\u003c/p\u003e\n            \u003col start=\"2\"\u003e\n              \u003cli\u003eTransformations\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eWe applied input scrambling on a subset of 500 randomly selected test instances; the order of the\n              concepts was randomly reassigned.\u003c/p\u003e\n            \u003col start=\"3\"\u003e\n              \u003cli\u003eSubpopulations\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eWe created a subpopulation based on input length, taking into account the number of concepts the input\n              test structures. By comparing inputs of different lengths, we can see to what extent systems are able to\n              handle different input sizes\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eConcept number\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e747\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e750\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization and Robustness\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTwo variants of \u003ca href=\"https://arxiv.org/abs/1910.13461\"\u003eBART\u003c/a\u003e, \u003ca\n                  href=\"https://arxiv.org/abs/2009.12677\"\u003eKnowledge Graph augemnted-BART\u003c/a\u003e and \u003ca\n                  href=\"https://arxiv.org/abs/2012.00366\"\u003eEnhanced Knowledge Injection Model for Commonsense\n                  Generation\u003c/a\u003e, hold the top two spots on the leaderboard, followed by a fine-tuned \u003ca\n                  href=\"https://arxiv.org/abs/1910.10683\"\u003eT5 model\u003c/a\u003e.\u003c/li\u003e\n              \u003cli\u003eThe following script shows how to download and load the data, fine-tune, and evaluate a model using\n                the ROUGE, BLEU, and METEOR metrics: \u003ca\n                  href=\"https://github.com/GEM-benchmark/GEM-baseline-models/blob/main/examples/GEM-common_gen.ipynb\"\u003eGEM\n                  sample script\u003c/a\u003e.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCommonsense Reasoning\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eSPICE: An evaluation metric for image captioning that is defined over scene graphs\u003c/li\u003e\n              \u003cli\u003eCIDEr: An n-gram overlap metric based on cosine similarity between the TF-IDF weighted ngram counts\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe main metrics are captioning metrics since the original concept lists were extracted from captioning\n              datasets. A human subject study with five graduate students was conducted and they were asked to rank the\n              \"commonsense plausibility\" of two models at a time.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe currently best performing model KFCNet (\u003ca\n                href=\"https://aclanthology.org/2021.findings-emnlp.249/\"\u003ehttps://aclanthology.org/2021.findings-emnlp.249/\u003c/a\u003e)\n              uses the same automatic evaluation but does not conduct any human evaluation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe most relevant results can be seen on the \u003ca\n                href=\"https://inklab.usc.edu/CommonGen/leaderboard.html\"\u003eleaderboard\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset creators selected sets of concepts that appeared in image and video captions (as identified\n              by a POS tagger) to ensure that a likely real-world scenario including the set could be imagined and\n              constructed. Section 3.1 of the \u003ca href=\"https://arxiv.org/pdf/1911.03705v3.pdf\"\u003epaper\u003c/a\u003e describes a\n              sampling scheme which encourages diversity of sets while selecting common concepts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a \u003cem\u003ecoherent\u003c/em\u003e sentence which mentions all of the source\n              concepts, and which describes a \u003cem\u003elikely\u003c/em\u003e situation that could be captured in a picture or video.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://www.mitpressjournals.org/doi/abs/10.1162/tacl_a_00166\"\u003eFlickr30k\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48\"\u003eMSCOCO\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://www.aclweb.org/anthology/P18-1238/\"\u003eConceptual Captions\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003eVideo captioning datasets:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ca href=\"https://link.springer.com/article/10.1007/s11263-016-0987-1\"\u003eLSMDC\u003c/a\u003e\u003c/li\u003e\n                  \u003cli\u003e\u003ca\n                      href=\"https://openaccess.thecvf.com/content_iccv_2017/html/Krishna_Dense-Captioning_Events_in_ICCV_2017_paper.html\"\u003eActivityNet\u003c/a\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ca\n                      href=\"https://openaccess.thecvf.com/content_ICCV_2019/html/Wang_VaTeX_A_Large-Scale_High-Quality_Multilingual_Dataset_for_Video-and-Language_Research_ICCV_2019_paper.html\"\u003eVaTeX\u003c/a\u003e\n                  \u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe training data consists of concept sets and captions for the source datasets. The concept sets are the\n              sets of labels of the images or videos, selected with a heuristic to maximize diversity while ensuring\n              that they represent likely scenarios.\u003c/p\u003e\n            \u003cp\u003eThe dev and test set sentences were created by Amazon Mechanical Turk crowd workers. The workers were\n              shown an example generation and a set of 4 or 5 concept names along with their part-of-speech and asked to\n              write:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eOne sentence mentioning all of the concepts\u003c/li\u003e\n              \u003cli\u003eA rationale explaining how the sentence connects the concept\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eA screenshot of the interface is provided in Figure 7 of the \u003ca\n                href=\"https://arxiv.org/pdf/1911.03705v3.pdf\"\u003eAppendix\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eInformation was not provided.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDuring the data collection, workers who provided rationales that were too short, failed to have good\n              coverage of the input in their sentences, or workers whose output had a high perplexity under a GPT-2\n              model were disqualified from the pool and replaced with newcomers.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data was sourced from Mechanical Turk which means that raters were aware that their annotations may\n              be publicly released for research purposes.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe concepts are restricted to verbs, adjectives, and common nouns, and no personal information is given\n              in the captions.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is created using data from image captioning systems and might inherit some of the social\n              biases represented therein (see e.g. \u003ca href=\"https://arxiv.org/abs/2006.08315\"\u003eTang et al. 2020\u003c/a\u003e).\u003c/p\u003e\n            \u003cp\u003eAnother related concern is the exposure bias introduced by the initial selection of pictures and video,\n              which are likely to over-represent situations that are common in the US at the expense of other parts of\n              the world (Flickr, for example, is a US-based company founded in Canada). For more discussion of the\n              potential impacts of exposure bias, see e.g. \u003ca href=\"https://www.aclweb.org/anthology/P16-2096.pdf\"\u003eThe\n                Social Impact of Natural Language Processing\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe concepts are restricted to verbs, adjectives, and common nouns, and no personal information is given\n              in the captions.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is in English, a language with an abundance of existing resources.\u003c/p\u003e\n            \u003cp\u003eThe use of GPT-2 to validate development ant test sentences \u003ca\n                href=\"https://www.aclweb.org/anthology/D19-1339.pdf\"\u003emight be cause for similar concern\u003c/a\u003e, but we do\n              note that the authors only use the model to discount very high perplexity sequences which is less likely\n              to surface those biases.\u003c/p\u003e\n            \u003cp\u003eThe language in the development and test set is crowdsourced, which means that it was written by workers\n              whose main goal was speed. This is likely to impact the quality and variety of the targets. The population\n              of crowdsource workers is also not identically distributed as the the base population of the locations the\n              workers come from, which may lead to different representation of situations or underlying expectations of\n              what these situations are.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDue to the overrepresentation of US-situations, the system may not work for users across the world.\n              Moreover, only limited information on the dataset quality are provided and the system may fail as a result\n              of unknown issues.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAny system needs to be evaluated on a broader set of unseen concepts then provided in the dataset. Since\n              the references for the test set are private, it is not known how well findings generalize beyond the\n              collection methodology.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"common_gen","type":"Reasoning","languages":"English","summary":"CommonGen is an English text generation task to explicitly test machines for the ability of generative commonsense reasoning. Given a set of common concepts, the task is to generate a coherent sentence describing an everyday scenario using these concepts. CommonGen is challenging because it inherently requires 1) relational reasoning using background commonsense knowledge, and 2) compositional generalization ability to work on unseen concept combinations. The dataset, constructed through a combination of crowd-sourcing from AMT and existing caption corpora, consists of 30k concept-sets and 50k sentences in total. Note that the CommonGen test set is private and requires submission to the external leaderboard."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"common_gen"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"common_gen","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ecommon_gen\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eCommonGen is an English text generation task to explicitly test machines for the ability of generative\n          commonsense reasoning. Given a set of common concepts, the task is to generate a coherent sentence describing\n          an everyday scenario using these concepts. CommonGen is challenging because it inherently requires 1)\n          relational reasoning using background commonsense knowledge, and 2) compositional generalization ability to\n          work on unseen concept combinations. The dataset, constructed through a combination of crowd-sourcing from AMT\n          and existing caption corpora, consists of 30k concept-sets and 50k sentences in total. Note that the CommonGen\n          test set is private and requires submission to the external leaderboard.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/common_gen')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/common_gen\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://inklab.usc.edu/CommonGen/\"\u003elink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.findings-emnlp.165\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eBill Yuchen Lin (USC), Wangchunshu Zhou (USC), Ming Shen (USC), Pei Zhou (USC), Chandra Bhagavatula\n          (AllenAI), Yejin Choi (AllenAI + UW), Xiang Ren (USC)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eBill Yuchen Lin\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003emit: MIT License\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe speaker is required to produce a \u003cem\u003ecoherent\u003c/em\u003e sentence which mentions all of the source concepts,\n          and which describes a \u003cem\u003elikely\u003c/em\u003e situation that could be captured in a picture or video.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://inklab.usc.edu/CommonGen/\"\u003elink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/INK-USC/CommonGen\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.findings-emnlp.165\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{lin-etal-2020-commongen,\ntitle = \"{C}ommon{G}en: A Constrained Text Generation Challenge for Generative Commonsense Reasoning\",\nauthor = \"Lin, Bill Yuchen  and\nZhou, Wangchunshu  and\nShen, Ming  and\nZhou, Pei  and\nBhagavatula, Chandra  and\nChoi, Yejin  and\nRen, Xiang\",\nbooktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2020\",\nmonth = nov,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/2020.findings-emnlp.165\",\npages = \"1823--1840\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBill Yuchen Lin\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:yuchen.lin@usc.edu\"\u003eyuchen.lin@usc.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://inklab.usc.edu/CommonGen/leaderboard.html\"\u003eLink\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe model outputs are evaluated against the crowdsourced references, and ranked by SPICE score. The\n              leaderboard also reports BLEU-4 and CIDEr scores.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo information is provided on regional restrictions and we thus assume that the covered dialects are\n              those spoken by raters on Mechanical Turk.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe concepts were extracted from multiple English image captioning datasets and the data was collected\n              via Amazon Mechanical Turk. No information on regional restrictions is provided.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emit: MIT License\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCommonGen is a constrained text generation task, associated with a benchmark dataset, to explicitly test\n              machines for the ability of generative commonsense reasoning.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a \u003cem\u003ecoherent\u003c/em\u003e sentence which mentions all of the source\n              concepts, and which describes a \u003cem\u003elikely\u003c/em\u003e situation that could be captured in a picture or video.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindependent\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was curated by a joint team of researchers from the University of Southern California and\n              Allen Institute for Artificial Intelligence.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBill Yuchen Lin (USC), Wangchunshu Zhou (USC), Ming Shen (USC), Pei Zhou (USC), Chandra Bhagavatula\n              (AllenAI), Yejin Choi (AllenAI + UW), Xiang Ren (USC)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe research is based upon work supported in part by the Office of the Director of National Intelligence\n              (ODNI), Intelligence Advanced Research Projects Activity (IARPA), the DARPA MCS program, and NSF SMA\n              18-29268.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYacine Jernite created the initial data card. It was later extended by Simon Mille. Sebastian Gehrmann\n              migrated it to the GEMv2 format.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA data instance has the following fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003econcepts\u003c/code\u003e: a \u003ccode\u003elist\u003c/code\u003e of \u003ccode\u003estring\u003c/code\u003e values denoting the concept the\n                system should write about. Has 3 to 5 items, constitutes the \u003ccode\u003einput\u003c/code\u003e of the task.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget\u003c/code\u003e: a sentence \u003ccode\u003estring\u003c/code\u003e mentioning all of the above mentioned\n                \u003ccode\u003econcepts\u003c/code\u003e. Constitutes the desired \u003ccode\u003eoutput\u003c/code\u003e of the task.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e[\n{\n\"concepts\": ['ski', 'mountain', 'skier'],\n\"target\": 'Skier skis down the mountain',\n},\n{\n\"concepts\": ['ski', 'mountain', 'skier'],\n\"target\": 'Three skiers are skiing on a snowy mountain.',\n},\n]\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach example in the dataset consists of a set of 3 to 5 concepts denoted by a single noun, verb, or\n              adjective (the input), and a sentence using these concepts (the output). The dataset provides several such\n              sentences for each such concept.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eTotal concept-sets\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e32,651\u003c/td\u003e\n                    \u003ctd\u003e993\u003c/td\u003e\n                    \u003ctd\u003e1,497\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eTotal sentences\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e67,389\u003c/td\u003e\n                    \u003ctd\u003e4,018\u003c/td\u003e\n                    \u003ctd\u003e6,042\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eAverage sentence length\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e10.54\u003c/td\u003e\n                    \u003ctd\u003e11.55\u003c/td\u003e\n                    \u003ctd\u003e13.34\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dev and test set were created by sampling sets of concepts of size 4 or 5 (and as many of size 3 for\n              the dev set) present in the source captioning datasets and having crowd-workers write reference sentences\n              using these concepts.\u003c/p\u003e\n            \u003cp\u003eConversely, the training set has more concept sets of size 3 than of size 4 and 5, and uses the original\n              captions from the source datasets as references.\u003c/p\u003e\n            \u003cp\u003eThe authors also ensured that the training, dev and test set have different combinations of unique\n              concepts to ensure compositionality (details in \u003ca href=\"https://arxiv.org/pdf/1911.03705v3.pdf\"\u003eTable\n                1\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCommonGen is a medium sized corpus with a unique reasoning challenge and interesting evaluation\n              possibilities.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCommonsense reasoning\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4 challenge sets for CommenGen were added to the GEM evaluation suite.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003col\u003e\n              \u003cli\u003eData Shift\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eWe created subsets of the training and development sets of ~500 randomly selected inputs each.\u003c/p\u003e\n            \u003col start=\"2\"\u003e\n              \u003cli\u003eTransformations\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eWe applied input scrambling on a subset of 500 randomly selected test instances; the order of the\n              concepts was randomly reassigned.\u003c/p\u003e\n            \u003col start=\"3\"\u003e\n              \u003cli\u003eSubpopulations\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eWe created a subpopulation based on input length, taking into account the number of concepts the input\n              test structures. By comparing inputs of different lengths, we can see to what extent systems are able to\n              handle different input sizes\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eConcept number\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e747\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e750\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization and Robustness\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTwo variants of \u003ca href=\"https://arxiv.org/abs/1910.13461\"\u003eBART\u003c/a\u003e, \u003ca\n                  href=\"https://arxiv.org/abs/2009.12677\"\u003eKnowledge Graph augemnted-BART\u003c/a\u003e and \u003ca\n                  href=\"https://arxiv.org/abs/2012.00366\"\u003eEnhanced Knowledge Injection Model for Commonsense\n                  Generation\u003c/a\u003e, hold the top two spots on the leaderboard, followed by a fine-tuned \u003ca\n                  href=\"https://arxiv.org/abs/1910.10683\"\u003eT5 model\u003c/a\u003e.\u003c/li\u003e\n              \u003cli\u003eThe following script shows how to download and load the data, fine-tune, and evaluate a model using\n                the ROUGE, BLEU, and METEOR metrics: \u003ca\n                  href=\"https://github.com/GEM-benchmark/GEM-baseline-models/blob/main/examples/GEM-common_gen.ipynb\"\u003eGEM\n                  sample script\u003c/a\u003e.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCommonsense Reasoning\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eSPICE: An evaluation metric for image captioning that is defined over scene graphs\u003c/li\u003e\n              \u003cli\u003eCIDEr: An n-gram overlap metric based on cosine similarity between the TF-IDF weighted ngram counts\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe main metrics are captioning metrics since the original concept lists were extracted from captioning\n              datasets. A human subject study with five graduate students was conducted and they were asked to rank the\n              \"commonsense plausibility\" of two models at a time.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe currently best performing model KFCNet (\u003ca\n                href=\"https://aclanthology.org/2021.findings-emnlp.249/\"\u003ehttps://aclanthology.org/2021.findings-emnlp.249/\u003c/a\u003e)\n              uses the same automatic evaluation but does not conduct any human evaluation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe most relevant results can be seen on the \u003ca\n                href=\"https://inklab.usc.edu/CommonGen/leaderboard.html\"\u003eleaderboard\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset creators selected sets of concepts that appeared in image and video captions (as identified\n              by a POS tagger) to ensure that a likely real-world scenario including the set could be imagined and\n              constructed. Section 3.1 of the \u003ca href=\"https://arxiv.org/pdf/1911.03705v3.pdf\"\u003epaper\u003c/a\u003e describes a\n              sampling scheme which encourages diversity of sets while selecting common concepts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a \u003cem\u003ecoherent\u003c/em\u003e sentence which mentions all of the source\n              concepts, and which describes a \u003cem\u003elikely\u003c/em\u003e situation that could be captured in a picture or video.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://www.mitpressjournals.org/doi/abs/10.1162/tacl_a_00166\"\u003eFlickr30k\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48\"\u003eMSCOCO\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://www.aclweb.org/anthology/P18-1238/\"\u003eConceptual Captions\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003eVideo captioning datasets:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ca href=\"https://link.springer.com/article/10.1007/s11263-016-0987-1\"\u003eLSMDC\u003c/a\u003e\u003c/li\u003e\n                  \u003cli\u003e\u003ca\n                      href=\"https://openaccess.thecvf.com/content_iccv_2017/html/Krishna_Dense-Captioning_Events_in_ICCV_2017_paper.html\"\u003eActivityNet\u003c/a\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ca\n                      href=\"https://openaccess.thecvf.com/content_ICCV_2019/html/Wang_VaTeX_A_Large-Scale_High-Quality_Multilingual_Dataset_for_Video-and-Language_Research_ICCV_2019_paper.html\"\u003eVaTeX\u003c/a\u003e\n                  \u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe training data consists of concept sets and captions for the source datasets. The concept sets are the\n              sets of labels of the images or videos, selected with a heuristic to maximize diversity while ensuring\n              that they represent likely scenarios.\u003c/p\u003e\n            \u003cp\u003eThe dev and test set sentences were created by Amazon Mechanical Turk crowd workers. The workers were\n              shown an example generation and a set of 4 or 5 concept names along with their part-of-speech and asked to\n              write:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eOne sentence mentioning all of the concepts\u003c/li\u003e\n              \u003cli\u003eA rationale explaining how the sentence connects the concept\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eA screenshot of the interface is provided in Figure 7 of the \u003ca\n                href=\"https://arxiv.org/pdf/1911.03705v3.pdf\"\u003eAppendix\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eInformation was not provided.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDuring the data collection, workers who provided rationales that were too short, failed to have good\n              coverage of the input in their sentences, or workers whose output had a high perplexity under a GPT-2\n              model were disqualified from the pool and replaced with newcomers.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data was sourced from Mechanical Turk which means that raters were aware that their annotations may\n              be publicly released for research purposes.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe concepts are restricted to verbs, adjectives, and common nouns, and no personal information is given\n              in the captions.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is created using data from image captioning systems and might inherit some of the social\n              biases represented therein (see e.g. \u003ca href=\"https://arxiv.org/abs/2006.08315\"\u003eTang et al. 2020\u003c/a\u003e).\u003c/p\u003e\n            \u003cp\u003eAnother related concern is the exposure bias introduced by the initial selection of pictures and video,\n              which are likely to over-represent situations that are common in the US at the expense of other parts of\n              the world (Flickr, for example, is a US-based company founded in Canada). For more discussion of the\n              potential impacts of exposure bias, see e.g. \u003ca href=\"https://www.aclweb.org/anthology/P16-2096.pdf\"\u003eThe\n                Social Impact of Natural Language Processing\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe concepts are restricted to verbs, adjectives, and common nouns, and no personal information is given\n              in the captions.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is in English, a language with an abundance of existing resources.\u003c/p\u003e\n            \u003cp\u003eThe use of GPT-2 to validate development ant test sentences \u003ca\n                href=\"https://www.aclweb.org/anthology/D19-1339.pdf\"\u003emight be cause for similar concern\u003c/a\u003e, but we do\n              note that the authors only use the model to discount very high perplexity sequences which is less likely\n              to surface those biases.\u003c/p\u003e\n            \u003cp\u003eThe language in the development and test set is crowdsourced, which means that it was written by workers\n              whose main goal was speed. This is likely to impact the quality and variety of the targets. The population\n              of crowdsource workers is also not identically distributed as the the base population of the locations the\n              workers come from, which may lead to different representation of situations or underlying expectations of\n              what these situations are.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDue to the overrepresentation of US-situations, the system may not work for users across the world.\n              Moreover, only limited information on the dataset quality are provided and the system may fail as a result\n              of unknown issues.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAny system needs to be evaluated on a broader set of unseen concepts then provided in the dataset. Since\n              the references for the test set are private, it is not known how well findings generalize beyond the\n              collection methodology.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"common_gen","type":"Reasoning","languages":"English","summary":"CommonGen is an English text generation task to explicitly test machines for the ability of generative commonsense reasoning. Given a set of common concepts, the task is to generate a coherent sentence describing an everyday scenario using these concepts. CommonGen is challenging because it inherently requires 1) relational reasoning using background commonsense knowledge, and 2) compositional generalization ability to work on unseen concept combinations. The dataset, constructed through a combination of crowd-sourcing from AMT and existing caption corpora, consists of 30k concept-sets and 50k sentences in total. Note that the CommonGen test set is private and requires submission to the external leaderboard."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"common_gen"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/conversational_weather.html b/data_cards/conversational_weather.html
index 71f7c13d..0b572112 100644
--- a/data_cards/conversational_weather.html
+++ b/data_cards/conversational_weather.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->conversational_weather</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">conversational_weather</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->conversational_weather</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">conversational_weather</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>conversational_weather</h2>
@@ -1636,4 +1636,4 @@ <h5>Unsuited Applications
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"conversational_weather","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003econversational_weather\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe purpose of this dataset is to assess how well a model can learn a template-like structure in a very low\n          data setting. The task here is to produce a response to a weather-related query. The reply is further\n          specified through the data attributes and discourse structure in the input. The output contains both the\n          lexicalized text and discourse markers for attributes (e.g., \u003ccode\u003e_ARG_TEMP_ 34\u003c/code\u003e).\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/conversational_weather')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/conversational_weather\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/P19-1080\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAnusha Balakrishnan, Jinfeng Rao, Kartikeya Upasani, Michael White, Rajen Subba (Facebook Conversational AI)\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eKartikeya Upasani\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProducing a text that is a response to a weather query as per the discourse structure and data attributes\n          specified in the input meaning representation.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/facebookresearch/TreeNLG\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/P19-1080\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{balakrishnan-etal-2019-constrained,\ntitle = \"Constrained Decoding for Neural {NLG} from Compositional Representations in Task-Oriented Dialogue\",\nauthor = \"Balakrishnan, Anusha  and\nRao, Jinfeng  and\nUpasani, Kartikeya  and\nWhite, Michael  and\nSubba, Rajen\",\nbooktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2019\",\naddress = \"Florence, Italy\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/P19-1080\",\ndoi = \"10.18653/v1/P19-1080\",\npages = \"831--844\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eKartikeya Upasani\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:kart@fb.com\"\u003ekart@fb.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is intended to help develop conversational agents that exhibit human-like properties such as\n              matching the framing of the response with the query or contrasting relevant data attributes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text that is a response to a weather query as per the discourse structure and data attributes\n              specified in the input meaning representation.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFacebook\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnusha Balakrishnan, Jinfeng Rao, Kartikeya Upasani, Michael White, Rajen Subba (Facebook Conversational\n              AI)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFacebook\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eVipul Raheja (Grammarly)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: (string): GEM-formatted row id\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eid\u003c/code\u003e: (string): Row id in the original data\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003euser_query\u003c/code\u003e: (string): Natural language weather query from humans\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etree_str_mr\u003c/code\u003e: (string): Synthetically-added user context (datetime and location) in the\n                form of a tree-structured MR\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eresponse\u003c/code\u003e: (string): A tree-structured annotation of the response.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'gem_id': 'weather-train-11',\n'id': '1108963',\n'synthetic_user_context': '[__DG_INFORM__ [__ARG_TASK__ get_forecast ] '\n                     '[__ARG_TEMP__ 37 ] [__ARG_TEMP_UNIT__ fahrenheit ] '\n                     '[__ARG_CLOUD_COVERAGE__ partly cloudy ] '\n                     '[__ARG_DATE_TIME__ [__ARG_COLLOQUIAL__ currently ] '\n                     '] [__ARG_LOCATION__ [__ARG_CITY__ Oakland ] '\n                     '[__ARG_COUNTRY__ United States ] [__ARG_REGION__ '\n                     'California ] ] ] [__DG_INFORM__ [__ARG_TASK__ '\n                     'get_forecast ] [__ARG_TEMP_SUMMARY__ mid 40s ] '\n                     '[__ARG_DATE_TIME_RANGE__ [__ARG_COLLOQUIAL__ This '\n                     'afternoon ] ] [__ARG_LOCATION__ [__ARG_CITY__ '\n                     'Oakland ] [__ARG_COUNTRY__ United States ] '\n                     '[__ARG_REGION__ California ] ] ] [__DG_INFORM__ '\n                     '[__ARG_TASK__ get_forecast ] '\n                     '[__ARG_CLOUD_COVERAGE__ mostly sunny ] '\n                     '[__ARG_DATE_TIME_RANGE__ [__ARG_COLLOQUIAL__ This '\n                     'afternoon ] ] [__ARG_LOCATION__ [__ARG_CITY__ '\n                     'Oakland ] [__ARG_COUNTRY__ United States ] '\n                     '[__ARG_REGION__ California ] ] ]',\n'tree_str_mr': \"[__DG_INFORM__ It's [__ARG_DATE_TIME__ [__ARG_COLLOQUIAL__ \"\n          'currently ] ] [__ARG_CLOUD_COVERAGE__ partly cloudy ] and '\n          '[__ARG_TEMP__ __ARG_TEMP__ ] [__ARG_TEMP_UNIT__ '\n          '__ARG_TEMP_UNIT__ ] [__ARG_LOCATION__ in [__ARG_CITY__ '\n          '__ARG_CITY__ ] , [__ARG_REGION__ __ARG_REGION__ ] , '\n          '[__ARG_COUNTRY__ __ARG_COUNTRY__ ] ] . ] [__DG_INFORM__ '\n          '[__ARG_DATE_TIME_RANGE__ [__ARG_COLLOQUIAL__ This afternoon ] '\n          \"] , it'll be [__ARG_CLOUD_COVERAGE__ mostly sunny ] ] \"\n          '[__DG_INFORM__ with temperatures in the [__ARG_TEMP_SUMMARY__ '\n          'mid \u0026#x3C;number\u003e  ] ]',\n'user_query': 'Show weather forecast for Oakland, CA. '}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eStandard Splits: Train/Validation/Test\u003c/li\u003e\n              \u003cli\u003eAdditional Split: Disc_Test (a more challenging subset of the test set that contains discourse\n                relations)\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe test set contains 3,121 examples, of which 1.1K (35%) have unique MRs that have never been seen in\n              the training set.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'gem_id': 'weather-train-13333', 'data_id': '1260610', 'user_query': 'Sundown', 'tree_str_mr': '[__DG_INFORM__ [__ARG_TASK__ get_weather_attribute ] [__ARG_SUNSET_TIME_DATE_TIME__ [__ARG_TIME__ 05:04 PM ] ] ]', 'response': '[__DG_INFORM__ The sun will go down at [__ARG_SUNSET_TIME_DATE_TIME__ [__ARG_TIME__ __ARG_TIME__ ] ] ]'}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was curated to develop a weather bot that exhibits human-like properties such as matching the\n              framing of the response with the query or contrasting relevant data attributes.\u003c/p\u003e\n            \u003cp\u003eThe dataset offers rich tree-based meaning representations that offer fine-grained control over the\n              response, e.g. by specifying which two attributes are to be contrasted. The natural language input queries\n              are also provided to model the coherence of the response based on the input. The output response is\n              annotated with the input meaning components using special bracketing tokens, which enables developing new\n              techniques such as constrained decoding to improve quality of output responses\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAdequately expressing CONTRAST and JUSTIFY discourse relations with appropriate grouping of arguments;\n              adequately generalizing to many combinations of arguments.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points removed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original repo contained a challenge set disc_test.tsv, which is a subset of the test set consisting\n              of discourse relations (CONTRAST and JUSTIFY) , but also contained JOIN relations.\n              This discrepancy has been rectified in the GEM version. The rectified version has been added in the\n              \u003ccode\u003echallenge_sets\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAdequately expressing CONTRAST and JUSTIFY discourse relations with appropriate grouping of arguments;\n              adequately generalizing to many combinations of arguments.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTree accuracy: It measures whether the tree structure in the prediction matches that of the input MR\n              exactly (modulo repeated arguments that need only appear once).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatic metrics are evaluated on the raw model predictions (which have de-lexicalized fields):\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eTree accuracy: Measures whether the tree structure in the prediction matches that of the input MR\n                exactly.\u003c/li\u003e\n              \u003cli\u003eBLEU-4: A word overlap metric commonly used for evaluating NLG systems.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eAuthors also performed human evaluation studies by asking annotators to evaluate the quality of responses\n              produced by different models. Annotators provided binary ratings on the following dimensions:\n              • Grammaticality: Measures fluency of the responses.\n              • Correctness: Measures semantic correctness of the responses.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was curated to develop a weather bot that exhibits human-like properties such as matching the\n              framing of the response with the query or contrasting relevant data attributes. To achieve this, the\n              dataset contains rich tree-structured meaning representations that are specified using several data\n              arguments and discourse acts, the input natural language queries, and annotations for the responses.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text that is a response to a weather query as per the discourse structure and data attributes\n              specified in the input meaning representation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e, \u003ccode\u003eMachine-generated\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is focused on the weather domain: Weather was the first successful case of NLG put into\n              production back in the 80s (Reiter \u0026#x26; Dale, 1997). This domain offers significant complexity for NLG.\n              Weather forecast summaries in particular can be very long, and require reasoning over several disjoint\n              pieces of information.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePlease refer to Appendix D of the original paper for details.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ehybrid\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePlease refer to Appendix C of the original paper for details.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotation was done as work for hire and contains no PII.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData is simulated and not specific to annotator.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGrammatical evaluations performed with the data to date have used norms from informal Standard American\n              English. These prescriptive notions of grammaticality potentially serve to perpetuate systemic power\n              imbalances as they’re conveyed by language.\u003c/p\u003e\n            \u003cp\u003eSince the data only contains informal Standard American English, its use to train a model may not be\n              appropriate depending on the potential use case.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotation was done as work for hire and contains no PII. Annotated data is simulated and not specific to\n              annotator.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAn imperfect model used to convey actual weather data could mislead users about weather conditions?\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"conversational_weather","type":"Data-to-Text","languages":"English","summary":"The purpose of this dataset is to assess how well a model can learn a template-like structure in a very low data setting. The task here is to produce a response to a weather-related query. The reply is further specified through the data attributes and discourse structure in the input. The output contains both the lexicalized text and discourse markers for attributes (e.g., `_ARG_TEMP_ 34`)."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"conversational_weather"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"conversational_weather","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003econversational_weather\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe purpose of this dataset is to assess how well a model can learn a template-like structure in a very low\n          data setting. The task here is to produce a response to a weather-related query. The reply is further\n          specified through the data attributes and discourse structure in the input. The output contains both the\n          lexicalized text and discourse markers for attributes (e.g., \u003ccode\u003e_ARG_TEMP_ 34\u003c/code\u003e).\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/conversational_weather')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/conversational_weather\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/P19-1080\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAnusha Balakrishnan, Jinfeng Rao, Kartikeya Upasani, Michael White, Rajen Subba (Facebook Conversational AI)\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eKartikeya Upasani\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProducing a text that is a response to a weather query as per the discourse structure and data attributes\n          specified in the input meaning representation.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/facebookresearch/TreeNLG\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/P19-1080\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{balakrishnan-etal-2019-constrained,\ntitle = \"Constrained Decoding for Neural {NLG} from Compositional Representations in Task-Oriented Dialogue\",\nauthor = \"Balakrishnan, Anusha  and\nRao, Jinfeng  and\nUpasani, Kartikeya  and\nWhite, Michael  and\nSubba, Rajen\",\nbooktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2019\",\naddress = \"Florence, Italy\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/P19-1080\",\ndoi = \"10.18653/v1/P19-1080\",\npages = \"831--844\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eKartikeya Upasani\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:kart@fb.com\"\u003ekart@fb.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is intended to help develop conversational agents that exhibit human-like properties such as\n              matching the framing of the response with the query or contrasting relevant data attributes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text that is a response to a weather query as per the discourse structure and data attributes\n              specified in the input meaning representation.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFacebook\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnusha Balakrishnan, Jinfeng Rao, Kartikeya Upasani, Michael White, Rajen Subba (Facebook Conversational\n              AI)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFacebook\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eVipul Raheja (Grammarly)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: (string): GEM-formatted row id\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eid\u003c/code\u003e: (string): Row id in the original data\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003euser_query\u003c/code\u003e: (string): Natural language weather query from humans\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etree_str_mr\u003c/code\u003e: (string): Synthetically-added user context (datetime and location) in the\n                form of a tree-structured MR\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eresponse\u003c/code\u003e: (string): A tree-structured annotation of the response.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'gem_id': 'weather-train-11',\n'id': '1108963',\n'synthetic_user_context': '[__DG_INFORM__ [__ARG_TASK__ get_forecast ] '\n                     '[__ARG_TEMP__ 37 ] [__ARG_TEMP_UNIT__ fahrenheit ] '\n                     '[__ARG_CLOUD_COVERAGE__ partly cloudy ] '\n                     '[__ARG_DATE_TIME__ [__ARG_COLLOQUIAL__ currently ] '\n                     '] [__ARG_LOCATION__ [__ARG_CITY__ Oakland ] '\n                     '[__ARG_COUNTRY__ United States ] [__ARG_REGION__ '\n                     'California ] ] ] [__DG_INFORM__ [__ARG_TASK__ '\n                     'get_forecast ] [__ARG_TEMP_SUMMARY__ mid 40s ] '\n                     '[__ARG_DATE_TIME_RANGE__ [__ARG_COLLOQUIAL__ This '\n                     'afternoon ] ] [__ARG_LOCATION__ [__ARG_CITY__ '\n                     'Oakland ] [__ARG_COUNTRY__ United States ] '\n                     '[__ARG_REGION__ California ] ] ] [__DG_INFORM__ '\n                     '[__ARG_TASK__ get_forecast ] '\n                     '[__ARG_CLOUD_COVERAGE__ mostly sunny ] '\n                     '[__ARG_DATE_TIME_RANGE__ [__ARG_COLLOQUIAL__ This '\n                     'afternoon ] ] [__ARG_LOCATION__ [__ARG_CITY__ '\n                     'Oakland ] [__ARG_COUNTRY__ United States ] '\n                     '[__ARG_REGION__ California ] ] ]',\n'tree_str_mr': \"[__DG_INFORM__ It's [__ARG_DATE_TIME__ [__ARG_COLLOQUIAL__ \"\n          'currently ] ] [__ARG_CLOUD_COVERAGE__ partly cloudy ] and '\n          '[__ARG_TEMP__ __ARG_TEMP__ ] [__ARG_TEMP_UNIT__ '\n          '__ARG_TEMP_UNIT__ ] [__ARG_LOCATION__ in [__ARG_CITY__ '\n          '__ARG_CITY__ ] , [__ARG_REGION__ __ARG_REGION__ ] , '\n          '[__ARG_COUNTRY__ __ARG_COUNTRY__ ] ] . ] [__DG_INFORM__ '\n          '[__ARG_DATE_TIME_RANGE__ [__ARG_COLLOQUIAL__ This afternoon ] '\n          \"] , it'll be [__ARG_CLOUD_COVERAGE__ mostly sunny ] ] \"\n          '[__DG_INFORM__ with temperatures in the [__ARG_TEMP_SUMMARY__ '\n          'mid \u0026#x3C;number\u003e  ] ]',\n'user_query': 'Show weather forecast for Oakland, CA. '}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eStandard Splits: Train/Validation/Test\u003c/li\u003e\n              \u003cli\u003eAdditional Split: Disc_Test (a more challenging subset of the test set that contains discourse\n                relations)\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe test set contains 3,121 examples, of which 1.1K (35%) have unique MRs that have never been seen in\n              the training set.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'gem_id': 'weather-train-13333', 'data_id': '1260610', 'user_query': 'Sundown', 'tree_str_mr': '[__DG_INFORM__ [__ARG_TASK__ get_weather_attribute ] [__ARG_SUNSET_TIME_DATE_TIME__ [__ARG_TIME__ 05:04 PM ] ] ]', 'response': '[__DG_INFORM__ The sun will go down at [__ARG_SUNSET_TIME_DATE_TIME__ [__ARG_TIME__ __ARG_TIME__ ] ] ]'}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was curated to develop a weather bot that exhibits human-like properties such as matching the\n              framing of the response with the query or contrasting relevant data attributes.\u003c/p\u003e\n            \u003cp\u003eThe dataset offers rich tree-based meaning representations that offer fine-grained control over the\n              response, e.g. by specifying which two attributes are to be contrasted. The natural language input queries\n              are also provided to model the coherence of the response based on the input. The output response is\n              annotated with the input meaning components using special bracketing tokens, which enables developing new\n              techniques such as constrained decoding to improve quality of output responses\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAdequately expressing CONTRAST and JUSTIFY discourse relations with appropriate grouping of arguments;\n              adequately generalizing to many combinations of arguments.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points removed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original repo contained a challenge set disc_test.tsv, which is a subset of the test set consisting\n              of discourse relations (CONTRAST and JUSTIFY) , but also contained JOIN relations.\n              This discrepancy has been rectified in the GEM version. The rectified version has been added in the\n              \u003ccode\u003echallenge_sets\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAdequately expressing CONTRAST and JUSTIFY discourse relations with appropriate grouping of arguments;\n              adequately generalizing to many combinations of arguments.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTree accuracy: It measures whether the tree structure in the prediction matches that of the input MR\n              exactly (modulo repeated arguments that need only appear once).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatic metrics are evaluated on the raw model predictions (which have de-lexicalized fields):\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eTree accuracy: Measures whether the tree structure in the prediction matches that of the input MR\n                exactly.\u003c/li\u003e\n              \u003cli\u003eBLEU-4: A word overlap metric commonly used for evaluating NLG systems.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eAuthors also performed human evaluation studies by asking annotators to evaluate the quality of responses\n              produced by different models. Annotators provided binary ratings on the following dimensions:\n              • Grammaticality: Measures fluency of the responses.\n              • Correctness: Measures semantic correctness of the responses.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was curated to develop a weather bot that exhibits human-like properties such as matching the\n              framing of the response with the query or contrasting relevant data attributes. To achieve this, the\n              dataset contains rich tree-structured meaning representations that are specified using several data\n              arguments and discourse acts, the input natural language queries, and annotations for the responses.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text that is a response to a weather query as per the discourse structure and data attributes\n              specified in the input meaning representation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e, \u003ccode\u003eMachine-generated\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is focused on the weather domain: Weather was the first successful case of NLG put into\n              production back in the 80s (Reiter \u0026#x26; Dale, 1997). This domain offers significant complexity for NLG.\n              Weather forecast summaries in particular can be very long, and require reasoning over several disjoint\n              pieces of information.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePlease refer to Appendix D of the original paper for details.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ehybrid\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePlease refer to Appendix C of the original paper for details.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotation was done as work for hire and contains no PII.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData is simulated and not specific to annotator.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGrammatical evaluations performed with the data to date have used norms from informal Standard American\n              English. These prescriptive notions of grammaticality potentially serve to perpetuate systemic power\n              imbalances as they’re conveyed by language.\u003c/p\u003e\n            \u003cp\u003eSince the data only contains informal Standard American English, its use to train a model may not be\n              appropriate depending on the potential use case.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotation was done as work for hire and contains no PII. Annotated data is simulated and not specific to\n              annotator.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAn imperfect model used to convey actual weather data could mislead users about weather conditions?\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"conversational_weather","type":"Data-to-Text","languages":"English","summary":"The purpose of this dataset is to assess how well a model can learn a template-like structure in a very low data setting. The task here is to produce a response to a weather-related query. The reply is further specified through the data attributes and discourse structure in the input. The output contains both the lexicalized text and discourse markers for attributes (e.g., `_ARG_TEMP_ 34`)."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"conversational_weather"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/cs_restaurants.html b/data_cards/cs_restaurants.html
index 2bb3a788..441e39d9 100644
--- a/data_cards/cs_restaurants.html
+++ b/data_cards/cs_restaurants.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->cs_restaurants</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">cs_restaurants</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->cs_restaurants</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">cs_restaurants</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1808,4 +1808,4 @@ <h5>Technical Limitations
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"cs_restaurants","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ecs_restaurants\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe Czech Restaurants dataset is a task oriented dialog dataset in which a model needs to verbalize a\n          response that a service agent could provide which is specified through a series of dialog acts. The dataset\n          originated as a translation of an English dataset to test the generation capabilities of an NLG system on a\n          highly morphologically rich language like Czech.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/cs_restaurants')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/cs_restaurants\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-8670/\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eOndrej Dusek and Filip Jurcicek\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eOndrej Dusek\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eCzech\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProducing a text expressing the given intent/dialogue act and all and only the attributes specified in the\n          input meaning representation.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/UFAL-DSG/cs_restaurant_dataset\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-8670/\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{cs_restaurants,\naddress = {Tokyo, Japan},\ntitle = {Neural {Generation} for {Czech}: {Data} and {Baselines}},\nshorttitle = {Neural {Generation} for {Czech}},\nurl = {https://www.aclweb.org/anthology/W19-8670/},\nurldate = {2019-10-18},\nbooktitle = {Proceedings of the 12th {International} {Conference} on {Natural} {Language} {Generation} ({INLG} 2019)},\nauthor = {Dušek, Ondřej and Jurčíček, Filip},\nmonth = oct,\nyear = {2019},\npages = {563--574},\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOndrej Dusek\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:odusek@ufal.mff.cuni.cz\"\u003eodusek@ufal.mff.cuni.cz\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo breakdown of dialects is provided.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCzech\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSix professional translators produced the outputs\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was created to test neural NLG systems in Czech and their ability to deal with rich\n              morphology.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text expressing the given intent/dialogue act and all and only the attributes specified in\n              the input meaning representation.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCharles University, Prague\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOndrej Dusek and Filip Jurcicek\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis research was supported by the Charles University project PRIMUS/19/SCI/10 and by the Ministry of\n              Education, Youth and Sports of the Czech Republic under the grant agreement LK11221. This work used using\n              language resources distributed by the LINDAT/CLARIN project of the Ministry of Education, Youth and Sports\n              of the Czech Republic (project LM2015071).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille wrote the initial data card and Yacine Jernite the data loader. Sebastian Gehrmann migrated\n              the data card and loader to the v2 format.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is stored in a JSON or CSV format, with identical contents. The data has 4 fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eda\u003c/code\u003e: the input meaning representation/dialogue act (MR)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edelex_da\u003c/code\u003e: the input MR, delexicalized -- all slot values are replaced with placeholders,\n                such as \u003ccode\u003eX-name\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etext\u003c/code\u003e: the corresponding target natural language text (reference)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edelex_text\u003c/code\u003e: the target text, delexicalized (delexicalization is applied regardless of\n                inflection)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eIn addition, the data contains a JSON file with all possible inflected forms for all slot values in the\n              dataset (\u003ccode\u003esurface_forms.json\u003c/code\u003e).\n              Each slot -\u003e value entry contains a list of inflected forms for the given value, with the base form\n              (lemma), the inflected form, and\n              a \u003ca href=\"https://ufal.mff.cuni.cz/pdt/Morphology_and_Tagging/Doc/hmptagqr.html\"\u003emorphological tag\u003c/a\u003e.\n            \u003c/p\u003e\n            \u003cp\u003eThe same MR is often repeated multiple times with different synonymous reference texts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data originated as a translation and localization of \u003ca\n                href=\"https://www.aclweb.org/anthology/D15-1199/\"\u003eWen et al.'s SF restaurant\u003c/a\u003e NLG dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe input MRs were collected from \u003ca href=\"https://www.aclweb.org/anthology/D15-1199/\"\u003eWen et al.'s SF\n                restaurant\u003c/a\u003e NLG data\n              and localized by randomly replacing slot values (using a list of Prague restaurant names, neighborhoods\n              etc.).\u003c/p\u003e\n            \u003cp\u003eThe generated slot values were then automatically replaced in reference texts in the data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"input\": \"inform_only_match(food=Turkish,name='Švejk Restaurant',near='Charles Bridge',price_range=cheap)\",\n\"target\": \"Našla jsem pouze jednu levnou restauraci poblíž Karlova mostu , kde podávají tureckou kuchyni , Švejk Restaurant .\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eProperty\u003c/th\u003e\n                    \u003cth\u003eValue\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTotal instances\u003c/td\u003e\n                    \u003ctd\u003e5,192\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUnique MRs\u003c/td\u003e\n                    \u003ctd\u003e2,417\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUnique delexicalized instances\u003c/td\u003e\n                    \u003ctd\u003e2,752\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUnique delexicalized MRs\u003c/td\u003e\n                    \u003ctd\u003e248\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe data is split in a roughly 3:1:1 proportion into training, development and test sections, making sure\n              no delexicalized MR\n              appears in two different parts. On the other hand, most DA types/intents are represented in all data\n              parts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe creators ensured that after delexicalization of the meaning representation there was no overlap\n              between training and test.\u003c/p\u003e\n            \u003cp\u003eThe data is split at a 3:1:1 rate between training, validation, and test.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis is one of a few non-English data-to-text datasets, in a well-known domain, but covering a\n              morphologically rich language that is harder to generate since named entities need to be inflected. This\n              makes it harder to apply common techniques such as delexicalization or copy mechanisms.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dialog acts in this dataset are much more varied than the e2e dataset which is the closest in style.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esurface realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e5 challenge sets for the Czech Restaurants dataset were added to the GEM evaluation suite.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eData shift: We created subsets of the training and development sets of 500 randomly selected inputs\n                each.\u003c/li\u003e\n              \u003cli\u003eScrambling: We applied input scrambling on a subset of 500 randomly selected test instances; the order\n                of the input dialogue acts was randomly reassigned.\u003c/li\u003e\n              \u003cli\u003eWe identified different subsets of the test set that we could compare to each other so that we would\n                have a better understanding of the results. There are currently two selections that we have made:\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eThe first comparison is based on input size: the number of predicates differs between different inputs,\n              ranging from 1 to 5.\n              The table below provides an indication of the distribution of inputs with a particular length.\n              It is clear from the table that this distribution is not balanced, and comparisions between items should\n              be done with caution.\n              Particularly for input size 4 and 5, there may not be enough data to draw reliable conclusions.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput length\u003c/th\u003e\n                    \u003cth\u003eNumber of inputs\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                    \u003ctd\u003e183\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e267\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e297\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e86\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe second comparison is based on the type of act. Again we caution against comparing the different\n              groups that have relatively few items.\n              It is probably OK to compare \u003ccode\u003einform\u003c/code\u003e and \u003ccode\u003e?request\u003c/code\u003e, but the other acts are all\n              low-frequent.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eAct\u003c/th\u003e\n                    \u003cth\u003eFrequency\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e?request\u003c/td\u003e\n                    \u003ctd\u003e149\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003einform\u003c/td\u003e\n                    \u003ctd\u003e609\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e?confirm\u003c/td\u003e\n                    \u003ctd\u003e22\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003einform_only_match\u003c/td\u003e\n                    \u003ctd\u003e16\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003einform_no_match\u003c/td\u003e\n                    \u003ctd\u003e34\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e?select\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization and robustness.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eutterance: something a system or user may say in a turn\u003c/li\u003e\n              \u003cli\u003emeaning representation: a representation of meaning that the system should be in accordance with. The\n                specific type of MR in this dataset are dialog acts which describe what a dialog system should do, e.g.,\n                inform a user about a value.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSurface realization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset uses the suite of word-overlap-based automatic metrics from the E2E NLG Challenge (BLEU,\n              NIST, ROUGE-L, METEOR, and CIDEr). In addition, the slot error rate is measured.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was created to test neural NLG systems in Czech and their ability to deal with rich\n              morphology.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text expressing the given intent/dialogue act and all and only the attributes specified in\n              the input MR.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCreated for the dataset\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eCreation Process\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf created for the dataset, describe the creation process.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSix professional translators translated the underlying dataset with the following instructions:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eEach utterance should be translated by itself\u003c/li\u003e\n              \u003cli\u003efluent spoken-style Czech should be produced\u003c/li\u003e\n              \u003cli\u003eFacts should be preserved\u003c/li\u003e\n              \u003cli\u003eIf possible, synonyms should be varied to create diverse utterances\u003c/li\u003e\n              \u003cli\u003eEntity names should be inflected as necessary\u003c/li\u003e\n              \u003cli\u003ethe reader of the generated text should be addressed using formal form and self-references should use\n                the female form.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThe translators did not have access to the meaning representation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt was not explicitly stated but we can safely assume that the translators agreed to this use of their\n              data.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset does not include any information about individuals.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset may help improve NLG methods for morphologically rich languages beyond Czech.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo ensure consistency of translation, the data always uses formal/polite address for the user, and uses\n              the female form for first-person self-references (as if the dialogue agent producing the sentences was\n              female). This prevents data sparsity and ensures consistent results for systems trained on the dataset,\n              but does not represent all potential situations arising in Czech.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe test set may lead users to over-estimate the performance of their NLG systems with respect to their\n              generalisability, because there are no unseen restaurants or addresses in the test set. This is something\n              we will look into for future editions of the GEM shared task.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"cs_restaurants","type":"Dialog Response Generation","languages":"Czech","summary":"The Czech Restaurants dataset is a task oriented dialog dataset in which a model needs to verbalize a response that a service agent could provide which is specified through a series of dialog acts. The dataset originated as a translation of an English dataset to test the generation capabilities of an NLG system on a highly morphologically rich language like Czech."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"cs_restaurants"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"cs_restaurants","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ecs_restaurants\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe Czech Restaurants dataset is a task oriented dialog dataset in which a model needs to verbalize a\n          response that a service agent could provide which is specified through a series of dialog acts. The dataset\n          originated as a translation of an English dataset to test the generation capabilities of an NLG system on a\n          highly morphologically rich language like Czech.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/cs_restaurants')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/cs_restaurants\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-8670/\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eOndrej Dusek and Filip Jurcicek\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eOndrej Dusek\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eCzech\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProducing a text expressing the given intent/dialogue act and all and only the attributes specified in the\n          input meaning representation.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/UFAL-DSG/cs_restaurant_dataset\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-8670/\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{cs_restaurants,\naddress = {Tokyo, Japan},\ntitle = {Neural {Generation} for {Czech}: {Data} and {Baselines}},\nshorttitle = {Neural {Generation} for {Czech}},\nurl = {https://www.aclweb.org/anthology/W19-8670/},\nurldate = {2019-10-18},\nbooktitle = {Proceedings of the 12th {International} {Conference} on {Natural} {Language} {Generation} ({INLG} 2019)},\nauthor = {Dušek, Ondřej and Jurčíček, Filip},\nmonth = oct,\nyear = {2019},\npages = {563--574},\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOndrej Dusek\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:odusek@ufal.mff.cuni.cz\"\u003eodusek@ufal.mff.cuni.cz\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo breakdown of dialects is provided.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCzech\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSix professional translators produced the outputs\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was created to test neural NLG systems in Czech and their ability to deal with rich\n              morphology.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text expressing the given intent/dialogue act and all and only the attributes specified in\n              the input meaning representation.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCharles University, Prague\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOndrej Dusek and Filip Jurcicek\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis research was supported by the Charles University project PRIMUS/19/SCI/10 and by the Ministry of\n              Education, Youth and Sports of the Czech Republic under the grant agreement LK11221. This work used using\n              language resources distributed by the LINDAT/CLARIN project of the Ministry of Education, Youth and Sports\n              of the Czech Republic (project LM2015071).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille wrote the initial data card and Yacine Jernite the data loader. Sebastian Gehrmann migrated\n              the data card and loader to the v2 format.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is stored in a JSON or CSV format, with identical contents. The data has 4 fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eda\u003c/code\u003e: the input meaning representation/dialogue act (MR)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edelex_da\u003c/code\u003e: the input MR, delexicalized -- all slot values are replaced with placeholders,\n                such as \u003ccode\u003eX-name\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etext\u003c/code\u003e: the corresponding target natural language text (reference)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edelex_text\u003c/code\u003e: the target text, delexicalized (delexicalization is applied regardless of\n                inflection)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eIn addition, the data contains a JSON file with all possible inflected forms for all slot values in the\n              dataset (\u003ccode\u003esurface_forms.json\u003c/code\u003e).\n              Each slot -\u003e value entry contains a list of inflected forms for the given value, with the base form\n              (lemma), the inflected form, and\n              a \u003ca href=\"https://ufal.mff.cuni.cz/pdt/Morphology_and_Tagging/Doc/hmptagqr.html\"\u003emorphological tag\u003c/a\u003e.\n            \u003c/p\u003e\n            \u003cp\u003eThe same MR is often repeated multiple times with different synonymous reference texts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data originated as a translation and localization of \u003ca\n                href=\"https://www.aclweb.org/anthology/D15-1199/\"\u003eWen et al.'s SF restaurant\u003c/a\u003e NLG dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe input MRs were collected from \u003ca href=\"https://www.aclweb.org/anthology/D15-1199/\"\u003eWen et al.'s SF\n                restaurant\u003c/a\u003e NLG data\n              and localized by randomly replacing slot values (using a list of Prague restaurant names, neighborhoods\n              etc.).\u003c/p\u003e\n            \u003cp\u003eThe generated slot values were then automatically replaced in reference texts in the data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"input\": \"inform_only_match(food=Turkish,name='Švejk Restaurant',near='Charles Bridge',price_range=cheap)\",\n\"target\": \"Našla jsem pouze jednu levnou restauraci poblíž Karlova mostu , kde podávají tureckou kuchyni , Švejk Restaurant .\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eProperty\u003c/th\u003e\n                    \u003cth\u003eValue\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTotal instances\u003c/td\u003e\n                    \u003ctd\u003e5,192\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUnique MRs\u003c/td\u003e\n                    \u003ctd\u003e2,417\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUnique delexicalized instances\u003c/td\u003e\n                    \u003ctd\u003e2,752\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUnique delexicalized MRs\u003c/td\u003e\n                    \u003ctd\u003e248\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe data is split in a roughly 3:1:1 proportion into training, development and test sections, making sure\n              no delexicalized MR\n              appears in two different parts. On the other hand, most DA types/intents are represented in all data\n              parts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe creators ensured that after delexicalization of the meaning representation there was no overlap\n              between training and test.\u003c/p\u003e\n            \u003cp\u003eThe data is split at a 3:1:1 rate between training, validation, and test.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis is one of a few non-English data-to-text datasets, in a well-known domain, but covering a\n              morphologically rich language that is harder to generate since named entities need to be inflected. This\n              makes it harder to apply common techniques such as delexicalization or copy mechanisms.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dialog acts in this dataset are much more varied than the e2e dataset which is the closest in style.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esurface realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e5 challenge sets for the Czech Restaurants dataset were added to the GEM evaluation suite.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eData shift: We created subsets of the training and development sets of 500 randomly selected inputs\n                each.\u003c/li\u003e\n              \u003cli\u003eScrambling: We applied input scrambling on a subset of 500 randomly selected test instances; the order\n                of the input dialogue acts was randomly reassigned.\u003c/li\u003e\n              \u003cli\u003eWe identified different subsets of the test set that we could compare to each other so that we would\n                have a better understanding of the results. There are currently two selections that we have made:\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eThe first comparison is based on input size: the number of predicates differs between different inputs,\n              ranging from 1 to 5.\n              The table below provides an indication of the distribution of inputs with a particular length.\n              It is clear from the table that this distribution is not balanced, and comparisions between items should\n              be done with caution.\n              Particularly for input size 4 and 5, there may not be enough data to draw reliable conclusions.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput length\u003c/th\u003e\n                    \u003cth\u003eNumber of inputs\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                    \u003ctd\u003e183\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e267\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e297\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e86\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe second comparison is based on the type of act. Again we caution against comparing the different\n              groups that have relatively few items.\n              It is probably OK to compare \u003ccode\u003einform\u003c/code\u003e and \u003ccode\u003e?request\u003c/code\u003e, but the other acts are all\n              low-frequent.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eAct\u003c/th\u003e\n                    \u003cth\u003eFrequency\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e?request\u003c/td\u003e\n                    \u003ctd\u003e149\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003einform\u003c/td\u003e\n                    \u003ctd\u003e609\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e?confirm\u003c/td\u003e\n                    \u003ctd\u003e22\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003einform_only_match\u003c/td\u003e\n                    \u003ctd\u003e16\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003einform_no_match\u003c/td\u003e\n                    \u003ctd\u003e34\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e?select\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization and robustness.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eutterance: something a system or user may say in a turn\u003c/li\u003e\n              \u003cli\u003emeaning representation: a representation of meaning that the system should be in accordance with. The\n                specific type of MR in this dataset are dialog acts which describe what a dialog system should do, e.g.,\n                inform a user about a value.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSurface realization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset uses the suite of word-overlap-based automatic metrics from the E2E NLG Challenge (BLEU,\n              NIST, ROUGE-L, METEOR, and CIDEr). In addition, the slot error rate is measured.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was created to test neural NLG systems in Czech and their ability to deal with rich\n              morphology.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text expressing the given intent/dialogue act and all and only the attributes specified in\n              the input MR.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCreated for the dataset\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eCreation Process\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf created for the dataset, describe the creation process.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSix professional translators translated the underlying dataset with the following instructions:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eEach utterance should be translated by itself\u003c/li\u003e\n              \u003cli\u003efluent spoken-style Czech should be produced\u003c/li\u003e\n              \u003cli\u003eFacts should be preserved\u003c/li\u003e\n              \u003cli\u003eIf possible, synonyms should be varied to create diverse utterances\u003c/li\u003e\n              \u003cli\u003eEntity names should be inflected as necessary\u003c/li\u003e\n              \u003cli\u003ethe reader of the generated text should be addressed using formal form and self-references should use\n                the female form.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThe translators did not have access to the meaning representation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt was not explicitly stated but we can safely assume that the translators agreed to this use of their\n              data.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset does not include any information about individuals.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset may help improve NLG methods for morphologically rich languages beyond Czech.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo ensure consistency of translation, the data always uses formal/polite address for the user, and uses\n              the female form for first-person self-references (as if the dialogue agent producing the sentences was\n              female). This prevents data sparsity and ensures consistent results for systems trained on the dataset,\n              but does not represent all potential situations arising in Czech.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe test set may lead users to over-estimate the performance of their NLG systems with respect to their\n              generalisability, because there are no unseen restaurants or addresses in the test set. This is something\n              we will look into for future editions of the GEM shared task.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"cs_restaurants","type":"Dialog Response Generation","languages":"Czech","summary":"The Czech Restaurants dataset is a task oriented dialog dataset in which a model needs to verbalize a response that a service agent could provide which is specified through a series of dialog acts. The dataset originated as a translation of an English dataset to test the generation capabilities of an NLG system on a highly morphologically rich language like Czech."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"cs_restaurants"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/dart.html b/data_cards/dart.html
index f4f99b47..96a9fe54 100644
--- a/data_cards/dart.html
+++ b/data_cards/dart.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->dart</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">dart</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->dart</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">dart</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1861,4 +1861,4 @@ <h5>Unsuited Applications
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"dart","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003edart\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eDART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format.\n          The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still\n          retaining the full information.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/dart')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/dart\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.37/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLinyong Nan, Dragomir Radev, Rui Zhang, Amrit Rau, Abhinand Sivaprasad, Chiachun Hsieh, Xiangru Tang, Aadit\n          Vyas, Neha Verma, Pranav Krishna, Yangxiaokang Liu, Nadia Irwanto, Jessica Pan, Faiaz Rahman, Ahmad Zaidi,\n          Mutethia Mutuma, Yasin Tarabar, Ankit Gupta, Tao Yu, Yi Chern Tan, Xi Victoria Lin, Caiming Xiong, Richard\n          Socher, Nazneen Fatema Rajani\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eDragomir Radev, Rui Zhang, Nazneen Rajani\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003emit: MIT License\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe speaker is required to produce coherent sentences and construct a trees structured ontology of the column\n          headers.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/Yale-LILY/dart\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.37/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{nan-etal-2021-dart,\ntitle = \"{DART}: Open-Domain Structured Data Record to Text Generation\",\nauthor = \"Nan, Linyong  and\nRadev, Dragomir  and\nZhang, Rui  and\nRau, Amrit  and\nSivaprasad, Abhinand  and\nHsieh, Chiachun  and\nTang, Xiangru  and\nVyas, Aadit  and\nVerma, Neha  and\nKrishna, Pranav  and\nLiu, Yangxiaokang  and\nIrwanto, Nadia  and\nPan, Jessica  and\nRahman, Faiaz  and\nZaidi, Ahmad  and\nMutuma, Mutethia  and\nTarabar, Yasin  and\nGupta, Ankit  and\nYu, Tao  and\nTan, Yi Chern  and\nLin, Xi Victoria  and\nXiong, Caiming  and\nSocher, Richard  and\nRajani, Nazneen Fatema\",\nbooktitle = \"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies\",\nmonth = jun,\nyear = \"2021\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2021.naacl-main.37\",\ndoi = \"10.18653/v1/2021.naacl-main.37\",\npages = \"432--447\",\nabstract = \"We present DART, an open domain structured DAta Record to Text generation dataset with over 82k instances (DARTs). Data-to-text annotations can be a costly process, especially when dealing with tables which are the major source of structured data and contain nontrivial structures. To this end, we propose a procedure of extracting semantic triples from tables that encodes their structures by exploiting the semantic dependencies among table headers and the table title. Our dataset construction framework effectively merged heterogeneous sources from open domain semantic parsing and spoken dialogue systems by utilizing techniques including tree ontology annotation, question-answer pair to declarative sentence conversion, and predicate unification, all with minimum post-editing. We present systematic evaluation on DART as well as new state-of-the-art results on WebNLG 2017 to show that DART (1) poses new challenges to existing data-to-text datasets and (2) facilitates out-of-domain generalization. Our data and code can be found at https://github.com/Yale-LILY/dart.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDragomir Radev, Rui Zhang, Nazneen Rajani\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{dragomir.radev, r.zhang}@yale.edu, {nazneen.rajani}@salesforce.com\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/Yale-LILY/dart#leaderboard\"\u003eLeaderboard\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSeveral state-of-the-art table-to-text models were evaluated on DART, such as BART (\u003ca\n                href=\"https://arxiv.org/pdf/1910.13461.pdf\"\u003eLewis et al., 2020\u003c/a\u003e), Seq2Seq-Att (\u003ca\n                href=\"https://webnlg-challenge.loria.fr/files/melbourne_report.pdf\"\u003eMELBOURNE\u003c/a\u003e) and End-to-End\n              Transformer (\u003ca href=\"https://arxiv.org/pdf/1908.09022.pdf\"\u003eCastro Ferreira et al., 2019\u003c/a\u003e).\n              The leaderboard reports BLEU, METEOR, TER, MoverScore, BERTScore and BLEURT scores.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt is an aggregated from multiple other datasets that use general US-American or British English without\n              differentiation between dialects.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is aggregated from multiple others that were crowdsourced on different platforms.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emit: MIT License\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is aimed to further research in natural language generation from semantic data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce coherent sentences and construct a trees structured ontology of the\n              column headers.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYale University, Salesforce Research, Penn State University, The University of Hong Kong, MIT\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLinyong Nan, Dragomir Radev, Rui Zhang, Amrit Rau, Abhinand Sivaprasad, Chiachun Hsieh, Xiangru Tang,\n              Aadit Vyas, Neha Verma, Pranav Krishna, Yangxiaokang Liu, Nadia Irwanto, Jessica Pan, Faiaz Rahman, Ahmad\n              Zaidi, Mutethia Mutuma, Yasin Tarabar, Ankit Gupta, Tao Yu, Yi Chern Tan, Xi Victoria Lin, Caiming Xiong,\n              Richard Socher, Nazneen Fatema Rajani\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMiruna Clinciu contributed the original data card and Yacine Jernite wrote the initial data loader.\n              Sebastian Gehrmann migrated the data card and the loader to the new format.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e-\u003ccode\u003etripleset\u003c/code\u003e: a list of tuples, each tuple has 3 items\n              -\u003ccode\u003esubtree_was_extended\u003c/code\u003e: a boolean variable (true or false)\n              -\u003ccode\u003eannotations\u003c/code\u003e: a list of dict, each with source and text keys.\n              -\u003ccode\u003esource\u003c/code\u003e: a string mentioning the name of the source table.\n              -\u003ccode\u003etext\u003c/code\u003e: a sentence string.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure is supposed to be able more complex structures beyond \"flat\" attribute-value pairs, instead\n              encoding hierarchical relationships.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThey are a combination of those from existing datasets and new annotations that take advantage of the\n              hierarchical structure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e {\n\"tripleset\": [\n[\n  \"Ben Mauk\",\n  \"High school\",\n  \"Kenton\"\n],\n[\n  \"Ben Mauk\",\n  \"College\",\n  \"Wake Forest Cincinnati\"\n]\n],\n\"subtree_was_extended\": false,\n\"annotations\": [\n{\n  \"source\": \"WikiTableQuestions_lily\",\n  \"text\": \"Ben Mauk, who attended Kenton High School, attended Wake Forest Cincinnati for college.\"\n}\n]\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput Unit\u003c/th\u003e\n                    \u003cth\u003eExamples\u003c/th\u003e\n                    \u003cth\u003eVocab Size\u003c/th\u003e\n                    \u003cth\u003eWords per SR\u003c/th\u003e\n                    \u003cth\u003eSents per SR\u003c/th\u003e\n                    \u003cth\u003eTables\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTriple Set\u003c/td\u003e\n                    \u003ctd\u003e82,191\u003c/td\u003e\n                    \u003ctd\u003e33.2K\u003c/td\u003e\n                    \u003ctd\u003e21.6\u003c/td\u003e\n                    \u003ctd\u003e1.5\u003c/td\u003e\n                    \u003ctd\u003e5,623\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e62,659\u003c/td\u003e\n                    \u003ctd\u003e6,980\u003c/td\u003e\n                    \u003ctd\u003e12,552\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eStatistics of DART decomposed by different collection methods. DART exhibits a great deal of topical\n              variety in terms of the number of unique predicates, the number of unique triples, and the vocabulary\n              size. These statistics are computed from DART v1.1.1; the number of unique predicates reported is\n              post-unification (see Section 3.4). SR: Surface Realization.\n              (\u003ca href=\"https://arxiv.org/pdf/2007.02871.pdf\"\u003edetails in Table 1 and 2\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor WebNLG 2017 and Cleaned E2E, DART use the original data splits. For the new annotation on\n              WikiTableQuestions and WikiSQL, random splitting will make train, dev, and test splits contain similar\n              tables and similar \u0026#x3C;triple-set, sentence\u003e examples. They are thus split based on Jaccard similarity\n              such that no training examples has a similarity with a test example of over 0.5\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDART is a large and open-domain structured DAta Record to Text generation corpus with high-quality\n              sentence annotations with each input being a set of entity-relation triples following a tree-structured\n              ontology.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe tree structure is unique among GEM datasets\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning, surface realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eExperimental results on DART shows that BART model as the highest performance among three models with a\n              BLEU score of 37.06. This is attributed to BART’s generalization ability due to pretraining (\u003ca\n                href=\"https://arxiv.org/pdf/2007.02871.pdf\"\u003eTable 4\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning, surface realization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eMoverScore\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eBLEURT\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe leaderboard uses the combination of BLEU, METEOR, TER, MoverScore, BERTScore, PARENT and BLEURT to\n              overcome the limitations of the n-gram overlap metrics.\u003cbr\u003e\n              A small scale human annotation of 100 data points was conducted along the dimensions of (1) fluency - a\n              sentence is natural and grammatical, and (2) semantic faithfulness - a sentence is supported by the input\n              triples.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003en/a\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBART currently achieves the best performance according to the leaderboard.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset creators encourage through DART further research in natural language generation from semantic\n              data. DART provides high-quality sentence annotations with each input being a set of entity-relation\n              triples in a tree structure.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce coherent sentences and construct a trees structured ontology of the\n              column headers.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003ehuman annotation on open-domain Wikipedia tables from WikiTableQuestions (\u003ca\n                  href=\"https://www.aclweb.org/anthology/P15-1142.pdf\"\u003ePasupat and Liang,\n                  2015\u003c/a\u003e) and WikiSQL (\u003ca href=\"https://arxiv.org/pdf/1709.00103.pdf\"\u003eZhong et al., 2017\u003c/a\u003e)\u003c/li\u003e\n              \u003cli\u003eautomatic conversion of questions in WikiSQL to declarative sentences\u003c/li\u003e\n              \u003cli\u003eincorporation of existing datasets including WebNLG 2017 (Gardent et al., 2017\u003ca\n                  href=\"https://www.aclweb.org/anthology/P17-1017.pdf\"\u003ea\u003c/a\u003e,\u003ca\n                  href=\"https://www.aclweb.org/anthology/W17-3518.pdf\"\u003eb\u003c/a\u003e; \u003ca\n                  href=\"https://www.aclweb.org/anthology/W18-6543.pdf\"\u003eShimorina and Gardent, 2018\u003c/a\u003e) and Cleaned E2E\n                (\u003ca href=\"https://arxiv.org/pdf/1706.09254.pdf\"\u003eNovikova et al., 2017b\u003c/a\u003e; Dušek et al., \u003ca\n                  href=\"https://arxiv.org/pdf/1810.01170.pdf\"\u003e2018\u003c/a\u003e, \u003ca\n                  href=\"https://www.aclweb.org/anthology/W19-8652.pdf\"\u003e2019\u003c/a\u003e)\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e, \u003ccode\u003eCreated for the dataset\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOffline media collection\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eCreation Process\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf created for the dataset, describe the creation process.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCreators proposed a two-stage annotation process for constructing triple set sentence pairs based on a\n              tree-structured ontology of each table. First, internal skilled annotators denote the parent column for\n              each column header. Then, a larger number of annotators provide a sentential description of an\n              automatically-chosen subset of table cells in a row. To form a triple set sentence pair, the highlighted\n              cells can be converted to a connected triple set automatically according to the column ontology for the\n              given table.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo further information about the MTurk workers has been provided.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe sub-datasets are from Wikipedia, DBPedia, and artificially created restaurant data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe new annotations are based on Wikipedia which is in the public domain and the other two datasets\n              permit reuse (with attribution)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone of the datasets talk about individuals\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo, the annotators are raters on crowdworking platforms and thus only represent their demographics.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset may contain some social biases, as the input sentences are based on Wikipedia\n              (WikiTableQuestions, WikiSQL, WebNLG). Studies have shown that the English Wikipedia contains gender\n              biases(\u003ca href=\"https://www.aclweb.org/anthology/2020.emnlp-main.23.pdf\"\u003eDinan et al., 2020\u003c/a\u003e), racial\n              biases([Papakyriakopoulos et al., 2020 (\u003ca\n                href=\"https://dl.acm.org/doi/pdf/10.1145/3351095.3372843\"\u003ehttps://dl.acm.org/doi/pdf/10.1145/3351095.3372843\u003c/a\u003e))\n              and geographical bias(\u003ca href=\"https://doi.org/10.5204/mcj.315\"\u003eLivingstone et al., 2010\u003c/a\u003e). \u003ca\n                href=\"https://en.wikipedia.org/wiki/Racial_bias_on_Wikipedia#cite_note-23\"\u003eMore info\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe end-to-end transformer has the lowest performance since the transformer model needs intermediate\n              pipeline planning steps to have higher performance. Similar findings can be found in \u003ca\n                href=\"https://arxiv.org/pdf/1908.09022.pdf\"\u003eCastro Ferreira et al., 2019\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"dart","type":"Data-to-Text","languages":"English","summary":"DART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format. The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still retaining the full information."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"dart"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"dart","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003edart\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eDART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format.\n          The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still\n          retaining the full information.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/dart')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/dart\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.37/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLinyong Nan, Dragomir Radev, Rui Zhang, Amrit Rau, Abhinand Sivaprasad, Chiachun Hsieh, Xiangru Tang, Aadit\n          Vyas, Neha Verma, Pranav Krishna, Yangxiaokang Liu, Nadia Irwanto, Jessica Pan, Faiaz Rahman, Ahmad Zaidi,\n          Mutethia Mutuma, Yasin Tarabar, Ankit Gupta, Tao Yu, Yi Chern Tan, Xi Victoria Lin, Caiming Xiong, Richard\n          Socher, Nazneen Fatema Rajani\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eDragomir Radev, Rui Zhang, Nazneen Rajani\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003emit: MIT License\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe speaker is required to produce coherent sentences and construct a trees structured ontology of the column\n          headers.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/Yale-LILY/dart\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.naacl-main.37/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{nan-etal-2021-dart,\ntitle = \"{DART}: Open-Domain Structured Data Record to Text Generation\",\nauthor = \"Nan, Linyong  and\nRadev, Dragomir  and\nZhang, Rui  and\nRau, Amrit  and\nSivaprasad, Abhinand  and\nHsieh, Chiachun  and\nTang, Xiangru  and\nVyas, Aadit  and\nVerma, Neha  and\nKrishna, Pranav  and\nLiu, Yangxiaokang  and\nIrwanto, Nadia  and\nPan, Jessica  and\nRahman, Faiaz  and\nZaidi, Ahmad  and\nMutuma, Mutethia  and\nTarabar, Yasin  and\nGupta, Ankit  and\nYu, Tao  and\nTan, Yi Chern  and\nLin, Xi Victoria  and\nXiong, Caiming  and\nSocher, Richard  and\nRajani, Nazneen Fatema\",\nbooktitle = \"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies\",\nmonth = jun,\nyear = \"2021\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2021.naacl-main.37\",\ndoi = \"10.18653/v1/2021.naacl-main.37\",\npages = \"432--447\",\nabstract = \"We present DART, an open domain structured DAta Record to Text generation dataset with over 82k instances (DARTs). Data-to-text annotations can be a costly process, especially when dealing with tables which are the major source of structured data and contain nontrivial structures. To this end, we propose a procedure of extracting semantic triples from tables that encodes their structures by exploiting the semantic dependencies among table headers and the table title. Our dataset construction framework effectively merged heterogeneous sources from open domain semantic parsing and spoken dialogue systems by utilizing techniques including tree ontology annotation, question-answer pair to declarative sentence conversion, and predicate unification, all with minimum post-editing. We present systematic evaluation on DART as well as new state-of-the-art results on WebNLG 2017 to show that DART (1) poses new challenges to existing data-to-text datasets and (2) facilitates out-of-domain generalization. Our data and code can be found at https://github.com/Yale-LILY/dart.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDragomir Radev, Rui Zhang, Nazneen Rajani\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{dragomir.radev, r.zhang}@yale.edu, {nazneen.rajani}@salesforce.com\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/Yale-LILY/dart#leaderboard\"\u003eLeaderboard\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSeveral state-of-the-art table-to-text models were evaluated on DART, such as BART (\u003ca\n                href=\"https://arxiv.org/pdf/1910.13461.pdf\"\u003eLewis et al., 2020\u003c/a\u003e), Seq2Seq-Att (\u003ca\n                href=\"https://webnlg-challenge.loria.fr/files/melbourne_report.pdf\"\u003eMELBOURNE\u003c/a\u003e) and End-to-End\n              Transformer (\u003ca href=\"https://arxiv.org/pdf/1908.09022.pdf\"\u003eCastro Ferreira et al., 2019\u003c/a\u003e).\n              The leaderboard reports BLEU, METEOR, TER, MoverScore, BERTScore and BLEURT scores.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt is an aggregated from multiple other datasets that use general US-American or British English without\n              differentiation between dialects.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is aggregated from multiple others that were crowdsourced on different platforms.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emit: MIT License\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is aimed to further research in natural language generation from semantic data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce coherent sentences and construct a trees structured ontology of the\n              column headers.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYale University, Salesforce Research, Penn State University, The University of Hong Kong, MIT\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLinyong Nan, Dragomir Radev, Rui Zhang, Amrit Rau, Abhinand Sivaprasad, Chiachun Hsieh, Xiangru Tang,\n              Aadit Vyas, Neha Verma, Pranav Krishna, Yangxiaokang Liu, Nadia Irwanto, Jessica Pan, Faiaz Rahman, Ahmad\n              Zaidi, Mutethia Mutuma, Yasin Tarabar, Ankit Gupta, Tao Yu, Yi Chern Tan, Xi Victoria Lin, Caiming Xiong,\n              Richard Socher, Nazneen Fatema Rajani\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMiruna Clinciu contributed the original data card and Yacine Jernite wrote the initial data loader.\n              Sebastian Gehrmann migrated the data card and the loader to the new format.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e-\u003ccode\u003etripleset\u003c/code\u003e: a list of tuples, each tuple has 3 items\n              -\u003ccode\u003esubtree_was_extended\u003c/code\u003e: a boolean variable (true or false)\n              -\u003ccode\u003eannotations\u003c/code\u003e: a list of dict, each with source and text keys.\n              -\u003ccode\u003esource\u003c/code\u003e: a string mentioning the name of the source table.\n              -\u003ccode\u003etext\u003c/code\u003e: a sentence string.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure is supposed to be able more complex structures beyond \"flat\" attribute-value pairs, instead\n              encoding hierarchical relationships.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThey are a combination of those from existing datasets and new annotations that take advantage of the\n              hierarchical structure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e {\n\"tripleset\": [\n[\n  \"Ben Mauk\",\n  \"High school\",\n  \"Kenton\"\n],\n[\n  \"Ben Mauk\",\n  \"College\",\n  \"Wake Forest Cincinnati\"\n]\n],\n\"subtree_was_extended\": false,\n\"annotations\": [\n{\n  \"source\": \"WikiTableQuestions_lily\",\n  \"text\": \"Ben Mauk, who attended Kenton High School, attended Wake Forest Cincinnati for college.\"\n}\n]\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput Unit\u003c/th\u003e\n                    \u003cth\u003eExamples\u003c/th\u003e\n                    \u003cth\u003eVocab Size\u003c/th\u003e\n                    \u003cth\u003eWords per SR\u003c/th\u003e\n                    \u003cth\u003eSents per SR\u003c/th\u003e\n                    \u003cth\u003eTables\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTriple Set\u003c/td\u003e\n                    \u003ctd\u003e82,191\u003c/td\u003e\n                    \u003ctd\u003e33.2K\u003c/td\u003e\n                    \u003ctd\u003e21.6\u003c/td\u003e\n                    \u003ctd\u003e1.5\u003c/td\u003e\n                    \u003ctd\u003e5,623\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e62,659\u003c/td\u003e\n                    \u003ctd\u003e6,980\u003c/td\u003e\n                    \u003ctd\u003e12,552\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eStatistics of DART decomposed by different collection methods. DART exhibits a great deal of topical\n              variety in terms of the number of unique predicates, the number of unique triples, and the vocabulary\n              size. These statistics are computed from DART v1.1.1; the number of unique predicates reported is\n              post-unification (see Section 3.4). SR: Surface Realization.\n              (\u003ca href=\"https://arxiv.org/pdf/2007.02871.pdf\"\u003edetails in Table 1 and 2\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor WebNLG 2017 and Cleaned E2E, DART use the original data splits. For the new annotation on\n              WikiTableQuestions and WikiSQL, random splitting will make train, dev, and test splits contain similar\n              tables and similar \u0026#x3C;triple-set, sentence\u003e examples. They are thus split based on Jaccard similarity\n              such that no training examples has a similarity with a test example of over 0.5\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDART is a large and open-domain structured DAta Record to Text generation corpus with high-quality\n              sentence annotations with each input being a set of entity-relation triples following a tree-structured\n              ontology.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe tree structure is unique among GEM datasets\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning, surface realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eExperimental results on DART shows that BART model as the highest performance among three models with a\n              BLEU score of 37.06. This is attributed to BART’s generalization ability due to pretraining (\u003ca\n                href=\"https://arxiv.org/pdf/2007.02871.pdf\"\u003eTable 4\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning, surface realization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eMoverScore\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eBLEURT\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe leaderboard uses the combination of BLEU, METEOR, TER, MoverScore, BERTScore, PARENT and BLEURT to\n              overcome the limitations of the n-gram overlap metrics.\u003cbr\u003e\n              A small scale human annotation of 100 data points was conducted along the dimensions of (1) fluency - a\n              sentence is natural and grammatical, and (2) semantic faithfulness - a sentence is supported by the input\n              triples.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003en/a\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBART currently achieves the best performance according to the leaderboard.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset creators encourage through DART further research in natural language generation from semantic\n              data. DART provides high-quality sentence annotations with each input being a set of entity-relation\n              triples in a tree structure.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce coherent sentences and construct a trees structured ontology of the\n              column headers.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003ehuman annotation on open-domain Wikipedia tables from WikiTableQuestions (\u003ca\n                  href=\"https://www.aclweb.org/anthology/P15-1142.pdf\"\u003ePasupat and Liang,\n                  2015\u003c/a\u003e) and WikiSQL (\u003ca href=\"https://arxiv.org/pdf/1709.00103.pdf\"\u003eZhong et al., 2017\u003c/a\u003e)\u003c/li\u003e\n              \u003cli\u003eautomatic conversion of questions in WikiSQL to declarative sentences\u003c/li\u003e\n              \u003cli\u003eincorporation of existing datasets including WebNLG 2017 (Gardent et al., 2017\u003ca\n                  href=\"https://www.aclweb.org/anthology/P17-1017.pdf\"\u003ea\u003c/a\u003e,\u003ca\n                  href=\"https://www.aclweb.org/anthology/W17-3518.pdf\"\u003eb\u003c/a\u003e; \u003ca\n                  href=\"https://www.aclweb.org/anthology/W18-6543.pdf\"\u003eShimorina and Gardent, 2018\u003c/a\u003e) and Cleaned E2E\n                (\u003ca href=\"https://arxiv.org/pdf/1706.09254.pdf\"\u003eNovikova et al., 2017b\u003c/a\u003e; Dušek et al., \u003ca\n                  href=\"https://arxiv.org/pdf/1810.01170.pdf\"\u003e2018\u003c/a\u003e, \u003ca\n                  href=\"https://www.aclweb.org/anthology/W19-8652.pdf\"\u003e2019\u003c/a\u003e)\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e, \u003ccode\u003eCreated for the dataset\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOffline media collection\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eCreation Process\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf created for the dataset, describe the creation process.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCreators proposed a two-stage annotation process for constructing triple set sentence pairs based on a\n              tree-structured ontology of each table. First, internal skilled annotators denote the parent column for\n              each column header. Then, a larger number of annotators provide a sentential description of an\n              automatically-chosen subset of table cells in a row. To form a triple set sentence pair, the highlighted\n              cells can be converted to a connected triple set automatically according to the column ontology for the\n              given table.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo further information about the MTurk workers has been provided.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe sub-datasets are from Wikipedia, DBPedia, and artificially created restaurant data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe new annotations are based on Wikipedia which is in the public domain and the other two datasets\n              permit reuse (with attribution)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone of the datasets talk about individuals\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo, the annotators are raters on crowdworking platforms and thus only represent their demographics.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset may contain some social biases, as the input sentences are based on Wikipedia\n              (WikiTableQuestions, WikiSQL, WebNLG). Studies have shown that the English Wikipedia contains gender\n              biases(\u003ca href=\"https://www.aclweb.org/anthology/2020.emnlp-main.23.pdf\"\u003eDinan et al., 2020\u003c/a\u003e), racial\n              biases([Papakyriakopoulos et al., 2020 (\u003ca\n                href=\"https://dl.acm.org/doi/pdf/10.1145/3351095.3372843\"\u003ehttps://dl.acm.org/doi/pdf/10.1145/3351095.3372843\u003c/a\u003e))\n              and geographical bias(\u003ca href=\"https://doi.org/10.5204/mcj.315\"\u003eLivingstone et al., 2010\u003c/a\u003e). \u003ca\n                href=\"https://en.wikipedia.org/wiki/Racial_bias_on_Wikipedia#cite_note-23\"\u003eMore info\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe end-to-end transformer has the lowest performance since the transformer model needs intermediate\n              pipeline planning steps to have higher performance. Similar findings can be found in \u003ca\n                href=\"https://arxiv.org/pdf/1908.09022.pdf\"\u003eCastro Ferreira et al., 2019\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"dart","type":"Data-to-Text","languages":"English","summary":"DART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format. The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still retaining the full information."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"dart"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/dstc10_track2_task2.html b/data_cards/dstc10_track2_task2.html
index 02f47f7f..87b78f10 100644
--- a/data_cards/dstc10_track2_task2.html
+++ b/data_cards/dstc10_track2_task2.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->dstc10_track2_task2</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">dstc10_track2_task2</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->dstc10_track2_task2</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">dstc10_track2_task2</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>dstc10_track2_task2</h2>
@@ -1692,4 +1692,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"dstc10_track2_task2","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003edstc10_track2_task2\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe DSTC10 Track2 Task 2 follows the DSTC9 Track1 task, where participants have to implement\n          knowledge-grounded dialog systems.\n          The training dataset is inherited from the DSTC9 challenge and is in the written domain, while the test set is\n          newly collected and consists of noisy ASR transcripts.\n          Hence, the dataset facilitates building models for grounded dialog response generation.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/dstc10_track2_task2')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/dstc10_track2_task2\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca\n            href=\"https://github.com/alexa/alexa-with-dstc10-track2-dataset\"\u003ehttps://github.com/alexa/alexa-with-dstc10-track2-dataset\u003c/a\u003e\n        \u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca\n            href=\"https://assets.amazon.science/54/a1/5282d47044179737b4289622c824/how-robust-are-you-evaluating-task-oriented-dialogue-systems-on-spoken-conversations.pdf\"\u003ehttps://assets.amazon.science/54/a1/5282d47044179737b4289622c824/how-robust-are-you-evaluating-task-oriented-dialogue-systems-on-spoken-conversations.pdf\u003c/a\u003e\n        \u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSeokhwan Kim, Yang Liu, Di Jin, Alexandros Papangelis, Karthik Gopalakrishnan, Behnam Hedayatnia, Dilek\n          Hakkani-Tur (Amazon Alexa AI)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSeokhwan Kim\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEn\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThis dataset aims to explore the robustness of conversational models when trained on spoken data. It has two\n          aspects, multi-domain dialogue state tracking and conversation modeling with access to unstructured knowledge.\n        \u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://github.com/alexa/alexa-with-dstc10-track2-dataset\"\u003ehttps://github.com/alexa/alexa-with-dstc10-track2-dataset\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://github.com/alexa/alexa-with-dstc10-track2-dataset\"\u003ehttps://github.com/alexa/alexa-with-dstc10-track2-dataset\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://assets.amazon.science/54/a1/5282d47044179737b4289622c824/how-robust-are-you-evaluating-task-oriented-dialogue-systems-on-spoken-conversations.pdf\"\u003ehttps://assets.amazon.science/54/a1/5282d47044179737b4289622c824/how-robust-are-you-evaluating-task-oriented-dialogue-systems-on-spoken-conversations.pdf\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e@inproceedings{kim2021robust,\n              title={\" How Robust ru?\": Evaluating Task-Oriented Dialogue Systems on Spoken Conversations},\n              author={Kim, Seokhwan and Liu, Yang and Jin, Di and Papangelis, Alexandros and Gopalakrishnan, Karthik and\n              Hedayatnia, Behnam and Hakkani-Tur, Dilek},\n              journal={IEEE Automatic Speech Recognition and Understanding Workshop},\n              year={2021}\n              }\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSeokhwan Kim\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:seokhwk@amazon.com\"\u003eseokhwk@amazon.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://eval.ai/challenge/1663/overview\"\u003ehttps://eval.ai/challenge/1663/overview\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt evaluates the models based on the automatic metrics defined in the task paper for the three tasks of\n              detection, selection and generation.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEn\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo conduct research on dialogue state tracking and knowledge-grounded response generation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset aims to explore the robustness of conversational models when trained on spoken data. It has\n              two aspects, multi-domain dialogue state tracking and conversation modeling with access to unstructured\n              knowledge.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAmazon\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSeokhwan Kim, Yang Liu, Di Jin, Alexandros Papangelis, Karthik Gopalakrishnan, Behnam Hedayatnia, Dilek\n              Hakkani-Tur (Amazon Alexa AI)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAmazon\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAlexandros Papangelis (Amazon Alexa AI), Di Jin (Amazon Alexa AI), Nico Daheim (RWTH Aachen University)\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e   features = datasets.Features(\n  {\n      \"id\": datasets.Value(\"string\"),\n      \"gem_id\": datasets.Value(\"string\"),\n      \"turns\": [\n          {\n              \"speaker\": datasets.Value(\"string\"),\n              \"text\": datasets.Value(\"string\"),\n              \"nbest\": [\n                  {\n                      \"hyp\": datasets.Value(\"string\"),\n                      \"score\": datasets.Value(\"float\"),\n                  }\n              ],\n          }\n      ],\n      \"knowledge\": {\n          \"domain\": datasets.Value(\"string\"),\n          \"entity_name\": datasets.Value(\"string\"),\n          \"title\": datasets.Value(\"string\"),\n          \"body\": datasets.Value(\"string\"),\n      },\n      \"response\": datasets.Value(\"string\"),\n      \"source\": datasets.Value(\"string\"),\n      \"linearized_input\": datasets.Value(\"string\"),\n      \"target\": datasets.Value(\"string\"),\n      \"references\": [datasets.Value(\"string\")],\n  }\n)\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003enbest contains an nbest list of outputs generated by an ASR system along with their scores.\u003c/p\u003e\n            \u003cp\u003eknowledge defines the annotated grounding as well as its metadata\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt was kept compatible with MultiWox 2.X data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{'id': '0',\n              'gem_id': 'GEM-dstc10_track2_task2-test-0',\n              'turns': [{'speaker': 'U',\n              'text': \"hi uh i'm looking for restaurant in lower ha\",\n              'nbest': [{'hyp': \"hi uh i'm looking for restaurant in lower ha\",\n              'score': -25.625450134277344},\n              {'hyp': \"hi uh i'm looking for restaurant in lower hai\",\n              'score': -25.969446182250977},\n              {'hyp': \"hi uh i'm looking for restaurant in lower haig\",\n              'score': -32.816890716552734},\n              {'hyp': \"hi uh i'm looking for restaurant in lower haigh\",\n              'score': -32.84316635131836},\n              {'hyp': \"hi uh i'm looking for restaurant in lower hag\",\n              'score': -32.8637580871582},\n              {'hyp': \"hi uh i'm looking for restaurant in lower hah\",\n              'score': -33.1048698425293},\n              {'hyp': \"hi uh i'm looking for restaurant in lower hait\",\n              'score': -33.96509552001953},\n              {'hyp': \"hi um i'm looking for restaurant in lower hai\",\n              'score': -33.97885513305664},\n              {'hyp': \"hi um i'm looking for restaurant in lower haig\",\n              'score': -34.56083679199219},\n              {'hyp': \"hi um i'm looking for restaurant in lower haigh\",\n              'score': -34.58711242675781}]},\n              {'speaker': 'S',\n              'text': 'yeah definitely i can go ahead and help you with that ummm what kind of option in a restaurant\n              are you looking for',\n              'nbest': []},\n              {'speaker': 'U',\n              'text': 'yeah umm am looking for an expensive restaurant',\n              'nbest': [{'hyp': 'yeah umm am looking for an expensive restaurant',\n              'score': -21.272899627685547},\n              {'hyp': 'yeah umm m looking for an expensive restaurant',\n              'score': -21.444047927856445},\n              {'hyp': 'yeah umm a m looking for an expensive restaurant',\n              'score': -21.565458297729492},\n              {'hyp': 'yeah ummm am looking for an expensive restaurant',\n              'score': -21.68832778930664},\n              {'hyp': 'yeah ummm m looking for an expensive restaurant',\n              'score': -21.85947608947754},\n              {'hyp': 'yeah ummm a m looking for an expensive restaurant',\n              'score': -21.980886459350586},\n              {'hyp': \"yeah umm a'm looking for an expensive restaurant\",\n              'score': -22.613924026489258},\n              {'hyp': \"yeah ummm a'm looking for an expensive restaurant\",\n              'score': -23.02935218811035},\n              {'hyp': 'yeah um am looking for an expensive restaurant',\n              'score': -23.11180305480957},\n              {'hyp': 'yeah um m looking for an expensive restaurant',\n              'score': -23.28295135498047}]},\n              {'speaker': 'S',\n              'text': \"lemme go ahead and see what i can find for you ok great so i do ummm actually no i'm sorry is\n              there something else i can help you find i don't see anything expensive\",\n              'nbest': []},\n              {'speaker': 'U',\n              'text': \"sure ummm maybe if you don't have anything expensive how about something in the moderate price\n              range\",\n              'nbest': [{'hyp': \"sure ummm maybe if you don't have anything expensive how about something in the\n              moderate price range\",\n              'score': -27.492507934570312},\n              {'hyp': \"sure umm maybe if you don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -27.75853729248047},\n              {'hyp': \"sure ummm maybe if you don't have anything expensive how about something in the moderate price\n              rang\",\n              'score': -29.44410514831543},\n              {'hyp': \"sure umm maybe if you don't have anything expensive how about something in the moderate price\n              rang\",\n              'score': -29.710134506225586},\n              {'hyp': \"sure um maybe if you don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -31.136560440063477},\n              {'hyp': \"sure um maybe if you don't have anything expensive how about something in the moderate price\n              rang\",\n              'score': -33.088157653808594},\n              {'hyp': \"sure ummm maybe i you don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -36.127620697021484},\n              {'hyp': \"sure umm maybe i you don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -36.39365005493164},\n              {'hyp': \"sure ummm maybe if yo don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -36.43605041503906},\n              {'hyp': \"sure umm maybe if yo don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -36.70207977294922}]},\n              {'speaker': 'S',\n              'text': 'ok moderate lemme go ahead and check to see what i can find for moderate ok great i do have\n              several options coming up how does the view lounge sound',\n              'nbest': []},\n              {'speaker': 'U',\n              'text': 'that sounds good ummm do they have any sort of happy hour special',\n              'nbest': [{'hyp': 'that sounds good ummm do they have any sort of happy hour special',\n              'score': -30.316478729248047},\n              {'hyp': 'that sounds good umm do they have any sort of happy hour special',\n              'score': -30.958009719848633},\n              {'hyp': 'that sounds good um do they have any sort of happy hour special',\n              'score': -34.463165283203125},\n              {'hyp': 'that sounds good ummm do they have any sirt of happy hour special',\n              'score': -34.48350143432617},\n              {'hyp': 'that sounds good umm do they have any sirt of happy hour special',\n              'score': -35.12503433227539},\n              {'hyp': 'that sounds good ummm do they have any sord of happy hour special',\n              'score': -35.61939239501953},\n              {'hyp': 'that sounds good umm do they have any sord of happy hour special',\n              'score': -36.26092529296875},\n              {'hyp': 'that sounds good ummm do they have any sont of happy hour special',\n              'score': -37.697105407714844},\n              {'hyp': 'that sounds good umm do they have any sont of happy hour special',\n              'score': -38.33863830566406},\n              {'hyp': 'that sounds good um do they have any sirt of happy hour special',\n              'score': -38.630191802978516}]}],\n              'knowledge': {'domain': 'restaurant',\n              'entity_name': 'The View Lounge',\n              'title': 'Does The View Lounge offer happy hour?',\n              'body': 'The View Lounge offers happy hour.'},\n              'response': 'uhhh great question lemme go ahead and check that out for you ok fantastic so it looks like\n              they do offer happy hour',\n              'source': 'sf_spoken',\n              'linearized_input': \" hi uh i'm looking for restaurant in lower ha yeah definitely i can go ahead and help\n              you with that ummm what kind of option in a restaurant are you looking for yeah umm am looking for an\n              expensive restaurant lemme go ahead and see what i can find for you ok great so i do ummm actually no i'm\n              sorry is there something else i can help you find i don't see anything expensive sure ummm maybe if you\n              don't have anything expensive how about something in the moderate price range ok moderate lemme go ahead\n              and check to see what i can find for moderate ok great i do have several options coming up how does the\n              view lounge sound that sounds good ummm do they have any sort of happy hour special || knowledge domain:\n              restaurant, entity: The View Lounge, title: Does The View Lounge offer happy hour?, information: The View\n              Lounge offers happy hour.\",\n              'target': 'uhhh great question lemme go ahead and check that out for you ok fantastic so it looks like\n              they do offer happy hour',\n              'references': ['uhhh great question lemme go ahead and check that out for you ok fantastic so it looks\n              like they do offer happy hour']}\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003etrain: training set, val: validation set, test: test set\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe track dataset originally only consists of a validation and test set in the spoken domain with noisy\n              ASR transcripts.\n              The training set is taken from the predecessor task DSTC9 Track 1 and contains written conversations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset can be used to evaluate conversational models on spoken inputs (using ASR hypotheses). In\n              particular, we can evaluate the models’ ability to understand language by tracking the dialogue state, and\n              their ability to generate knowledge-grounded responses.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset contains transcribed spoken interactions.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe can measure the model’s ability to understand language and to generate knowledge-grounded responses.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset can be used to evaluate conversational models on spoken inputs (using ASR hypotheses). In\n              particular, we can evaluate the models’ ability to generate knowledge-grounded responses.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU-1, BLEU-2, BLEU-3, BLEU-4, METEOR, ROGUE-1, ROGUE-2, ROGUE-L\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe want to explore how conversational models perform on spoken data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset aims to explore the robustness of conversational models when evaluated on spoken data. It\n              has two aspects, multi-domain dialogue state tracking and conversation modeling with access to\n              unstructured knowledge.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe conversations revolve around 5 domains (or topics): hotels, restaurants, attractions, taxi, train.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe subjects were instructed to conduct fictional conversations about booking restaurants or requesting\n              fictional information.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere should be no risk related to PII as the subjects conduct fictional conversations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"dstc10_track2_task2","type":"Dialog Response Generation","languages":"En","summary":"The DSTC10 Track2 Task 2 follows the DSTC9 Track1 task, where participants have to implement knowledge-grounded dialog systems. The training dataset is inherited from the DSTC9 challenge and is in the written domain, while the test set is newly collected and consists of noisy ASR transcripts. Hence, the dataset facilitates building models for grounded dialog response generation."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"dstc10_track2_task2"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"dstc10_track2_task2","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003edstc10_track2_task2\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe DSTC10 Track2 Task 2 follows the DSTC9 Track1 task, where participants have to implement\n          knowledge-grounded dialog systems.\n          The training dataset is inherited from the DSTC9 challenge and is in the written domain, while the test set is\n          newly collected and consists of noisy ASR transcripts.\n          Hence, the dataset facilitates building models for grounded dialog response generation.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/dstc10_track2_task2')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/dstc10_track2_task2\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca\n            href=\"https://github.com/alexa/alexa-with-dstc10-track2-dataset\"\u003ehttps://github.com/alexa/alexa-with-dstc10-track2-dataset\u003c/a\u003e\n        \u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca\n            href=\"https://assets.amazon.science/54/a1/5282d47044179737b4289622c824/how-robust-are-you-evaluating-task-oriented-dialogue-systems-on-spoken-conversations.pdf\"\u003ehttps://assets.amazon.science/54/a1/5282d47044179737b4289622c824/how-robust-are-you-evaluating-task-oriented-dialogue-systems-on-spoken-conversations.pdf\u003c/a\u003e\n        \u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSeokhwan Kim, Yang Liu, Di Jin, Alexandros Papangelis, Karthik Gopalakrishnan, Behnam Hedayatnia, Dilek\n          Hakkani-Tur (Amazon Alexa AI)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSeokhwan Kim\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEn\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThis dataset aims to explore the robustness of conversational models when trained on spoken data. It has two\n          aspects, multi-domain dialogue state tracking and conversation modeling with access to unstructured knowledge.\n        \u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://github.com/alexa/alexa-with-dstc10-track2-dataset\"\u003ehttps://github.com/alexa/alexa-with-dstc10-track2-dataset\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://github.com/alexa/alexa-with-dstc10-track2-dataset\"\u003ehttps://github.com/alexa/alexa-with-dstc10-track2-dataset\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://assets.amazon.science/54/a1/5282d47044179737b4289622c824/how-robust-are-you-evaluating-task-oriented-dialogue-systems-on-spoken-conversations.pdf\"\u003ehttps://assets.amazon.science/54/a1/5282d47044179737b4289622c824/how-robust-are-you-evaluating-task-oriented-dialogue-systems-on-spoken-conversations.pdf\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e@inproceedings{kim2021robust,\n              title={\" How Robust ru?\": Evaluating Task-Oriented Dialogue Systems on Spoken Conversations},\n              author={Kim, Seokhwan and Liu, Yang and Jin, Di and Papangelis, Alexandros and Gopalakrishnan, Karthik and\n              Hedayatnia, Behnam and Hakkani-Tur, Dilek},\n              journal={IEEE Automatic Speech Recognition and Understanding Workshop},\n              year={2021}\n              }\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSeokhwan Kim\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:seokhwk@amazon.com\"\u003eseokhwk@amazon.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://eval.ai/challenge/1663/overview\"\u003ehttps://eval.ai/challenge/1663/overview\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt evaluates the models based on the automatic metrics defined in the task paper for the three tasks of\n              detection, selection and generation.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEn\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eapache-2.0: Apache License 2.0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo conduct research on dialogue state tracking and knowledge-grounded response generation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset aims to explore the robustness of conversational models when trained on spoken data. It has\n              two aspects, multi-domain dialogue state tracking and conversation modeling with access to unstructured\n              knowledge.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAmazon\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSeokhwan Kim, Yang Liu, Di Jin, Alexandros Papangelis, Karthik Gopalakrishnan, Behnam Hedayatnia, Dilek\n              Hakkani-Tur (Amazon Alexa AI)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAmazon\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAlexandros Papangelis (Amazon Alexa AI), Di Jin (Amazon Alexa AI), Nico Daheim (RWTH Aachen University)\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e   features = datasets.Features(\n  {\n      \"id\": datasets.Value(\"string\"),\n      \"gem_id\": datasets.Value(\"string\"),\n      \"turns\": [\n          {\n              \"speaker\": datasets.Value(\"string\"),\n              \"text\": datasets.Value(\"string\"),\n              \"nbest\": [\n                  {\n                      \"hyp\": datasets.Value(\"string\"),\n                      \"score\": datasets.Value(\"float\"),\n                  }\n              ],\n          }\n      ],\n      \"knowledge\": {\n          \"domain\": datasets.Value(\"string\"),\n          \"entity_name\": datasets.Value(\"string\"),\n          \"title\": datasets.Value(\"string\"),\n          \"body\": datasets.Value(\"string\"),\n      },\n      \"response\": datasets.Value(\"string\"),\n      \"source\": datasets.Value(\"string\"),\n      \"linearized_input\": datasets.Value(\"string\"),\n      \"target\": datasets.Value(\"string\"),\n      \"references\": [datasets.Value(\"string\")],\n  }\n)\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003enbest contains an nbest list of outputs generated by an ASR system along with their scores.\u003c/p\u003e\n            \u003cp\u003eknowledge defines the annotated grounding as well as its metadata\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt was kept compatible with MultiWox 2.X data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{'id': '0',\n              'gem_id': 'GEM-dstc10_track2_task2-test-0',\n              'turns': [{'speaker': 'U',\n              'text': \"hi uh i'm looking for restaurant in lower ha\",\n              'nbest': [{'hyp': \"hi uh i'm looking for restaurant in lower ha\",\n              'score': -25.625450134277344},\n              {'hyp': \"hi uh i'm looking for restaurant in lower hai\",\n              'score': -25.969446182250977},\n              {'hyp': \"hi uh i'm looking for restaurant in lower haig\",\n              'score': -32.816890716552734},\n              {'hyp': \"hi uh i'm looking for restaurant in lower haigh\",\n              'score': -32.84316635131836},\n              {'hyp': \"hi uh i'm looking for restaurant in lower hag\",\n              'score': -32.8637580871582},\n              {'hyp': \"hi uh i'm looking for restaurant in lower hah\",\n              'score': -33.1048698425293},\n              {'hyp': \"hi uh i'm looking for restaurant in lower hait\",\n              'score': -33.96509552001953},\n              {'hyp': \"hi um i'm looking for restaurant in lower hai\",\n              'score': -33.97885513305664},\n              {'hyp': \"hi um i'm looking for restaurant in lower haig\",\n              'score': -34.56083679199219},\n              {'hyp': \"hi um i'm looking for restaurant in lower haigh\",\n              'score': -34.58711242675781}]},\n              {'speaker': 'S',\n              'text': 'yeah definitely i can go ahead and help you with that ummm what kind of option in a restaurant\n              are you looking for',\n              'nbest': []},\n              {'speaker': 'U',\n              'text': 'yeah umm am looking for an expensive restaurant',\n              'nbest': [{'hyp': 'yeah umm am looking for an expensive restaurant',\n              'score': -21.272899627685547},\n              {'hyp': 'yeah umm m looking for an expensive restaurant',\n              'score': -21.444047927856445},\n              {'hyp': 'yeah umm a m looking for an expensive restaurant',\n              'score': -21.565458297729492},\n              {'hyp': 'yeah ummm am looking for an expensive restaurant',\n              'score': -21.68832778930664},\n              {'hyp': 'yeah ummm m looking for an expensive restaurant',\n              'score': -21.85947608947754},\n              {'hyp': 'yeah ummm a m looking for an expensive restaurant',\n              'score': -21.980886459350586},\n              {'hyp': \"yeah umm a'm looking for an expensive restaurant\",\n              'score': -22.613924026489258},\n              {'hyp': \"yeah ummm a'm looking for an expensive restaurant\",\n              'score': -23.02935218811035},\n              {'hyp': 'yeah um am looking for an expensive restaurant',\n              'score': -23.11180305480957},\n              {'hyp': 'yeah um m looking for an expensive restaurant',\n              'score': -23.28295135498047}]},\n              {'speaker': 'S',\n              'text': \"lemme go ahead and see what i can find for you ok great so i do ummm actually no i'm sorry is\n              there something else i can help you find i don't see anything expensive\",\n              'nbest': []},\n              {'speaker': 'U',\n              'text': \"sure ummm maybe if you don't have anything expensive how about something in the moderate price\n              range\",\n              'nbest': [{'hyp': \"sure ummm maybe if you don't have anything expensive how about something in the\n              moderate price range\",\n              'score': -27.492507934570312},\n              {'hyp': \"sure umm maybe if you don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -27.75853729248047},\n              {'hyp': \"sure ummm maybe if you don't have anything expensive how about something in the moderate price\n              rang\",\n              'score': -29.44410514831543},\n              {'hyp': \"sure umm maybe if you don't have anything expensive how about something in the moderate price\n              rang\",\n              'score': -29.710134506225586},\n              {'hyp': \"sure um maybe if you don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -31.136560440063477},\n              {'hyp': \"sure um maybe if you don't have anything expensive how about something in the moderate price\n              rang\",\n              'score': -33.088157653808594},\n              {'hyp': \"sure ummm maybe i you don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -36.127620697021484},\n              {'hyp': \"sure umm maybe i you don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -36.39365005493164},\n              {'hyp': \"sure ummm maybe if yo don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -36.43605041503906},\n              {'hyp': \"sure umm maybe if yo don't have anything expensive how about something in the moderate price\n              range\",\n              'score': -36.70207977294922}]},\n              {'speaker': 'S',\n              'text': 'ok moderate lemme go ahead and check to see what i can find for moderate ok great i do have\n              several options coming up how does the view lounge sound',\n              'nbest': []},\n              {'speaker': 'U',\n              'text': 'that sounds good ummm do they have any sort of happy hour special',\n              'nbest': [{'hyp': 'that sounds good ummm do they have any sort of happy hour special',\n              'score': -30.316478729248047},\n              {'hyp': 'that sounds good umm do they have any sort of happy hour special',\n              'score': -30.958009719848633},\n              {'hyp': 'that sounds good um do they have any sort of happy hour special',\n              'score': -34.463165283203125},\n              {'hyp': 'that sounds good ummm do they have any sirt of happy hour special',\n              'score': -34.48350143432617},\n              {'hyp': 'that sounds good umm do they have any sirt of happy hour special',\n              'score': -35.12503433227539},\n              {'hyp': 'that sounds good ummm do they have any sord of happy hour special',\n              'score': -35.61939239501953},\n              {'hyp': 'that sounds good umm do they have any sord of happy hour special',\n              'score': -36.26092529296875},\n              {'hyp': 'that sounds good ummm do they have any sont of happy hour special',\n              'score': -37.697105407714844},\n              {'hyp': 'that sounds good umm do they have any sont of happy hour special',\n              'score': -38.33863830566406},\n              {'hyp': 'that sounds good um do they have any sirt of happy hour special',\n              'score': -38.630191802978516}]}],\n              'knowledge': {'domain': 'restaurant',\n              'entity_name': 'The View Lounge',\n              'title': 'Does The View Lounge offer happy hour?',\n              'body': 'The View Lounge offers happy hour.'},\n              'response': 'uhhh great question lemme go ahead and check that out for you ok fantastic so it looks like\n              they do offer happy hour',\n              'source': 'sf_spoken',\n              'linearized_input': \" hi uh i'm looking for restaurant in lower ha yeah definitely i can go ahead and help\n              you with that ummm what kind of option in a restaurant are you looking for yeah umm am looking for an\n              expensive restaurant lemme go ahead and see what i can find for you ok great so i do ummm actually no i'm\n              sorry is there something else i can help you find i don't see anything expensive sure ummm maybe if you\n              don't have anything expensive how about something in the moderate price range ok moderate lemme go ahead\n              and check to see what i can find for moderate ok great i do have several options coming up how does the\n              view lounge sound that sounds good ummm do they have any sort of happy hour special || knowledge domain:\n              restaurant, entity: The View Lounge, title: Does The View Lounge offer happy hour?, information: The View\n              Lounge offers happy hour.\",\n              'target': 'uhhh great question lemme go ahead and check that out for you ok fantastic so it looks like\n              they do offer happy hour',\n              'references': ['uhhh great question lemme go ahead and check that out for you ok fantastic so it looks\n              like they do offer happy hour']}\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003etrain: training set, val: validation set, test: test set\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe track dataset originally only consists of a validation and test set in the spoken domain with noisy\n              ASR transcripts.\n              The training set is taken from the predecessor task DSTC9 Track 1 and contains written conversations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset can be used to evaluate conversational models on spoken inputs (using ASR hypotheses). In\n              particular, we can evaluate the models’ ability to understand language by tracking the dialogue state, and\n              their ability to generate knowledge-grounded responses.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset contains transcribed spoken interactions.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe can measure the model’s ability to understand language and to generate knowledge-grounded responses.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset can be used to evaluate conversational models on spoken inputs (using ASR hypotheses). In\n              particular, we can evaluate the models’ ability to generate knowledge-grounded responses.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU-1, BLEU-2, BLEU-3, BLEU-4, METEOR, ROGUE-1, ROGUE-2, ROGUE-L\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe want to explore how conversational models perform on spoken data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset aims to explore the robustness of conversational models when evaluated on spoken data. It\n              has two aspects, multi-domain dialogue state tracking and conversation modeling with access to\n              unstructured knowledge.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe conversations revolve around 5 domains (or topics): hotels, restaurants, attractions, taxi, train.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe subjects were instructed to conduct fictional conversations about booking restaurants or requesting\n              fictional information.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere should be no risk related to PII as the subjects conduct fictional conversations.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"dstc10_track2_task2","type":"Dialog Response Generation","languages":"En","summary":"The DSTC10 Track2 Task 2 follows the DSTC9 Track1 task, where participants have to implement knowledge-grounded dialog systems. The training dataset is inherited from the DSTC9 challenge and is in the written domain, while the test set is newly collected and consists of noisy ASR transcripts. Hence, the dataset facilitates building models for grounded dialog response generation."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"dstc10_track2_task2"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/e2e_nlg.html b/data_cards/e2e_nlg.html
index 7ae50e4d..f0684fa2 100644
--- a/data_cards/e2e_nlg.html
+++ b/data_cards/e2e_nlg.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->e2e_nlg</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">e2e_nlg</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->e2e_nlg</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">e2e_nlg</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1870,4 +1870,4 @@ <h5>Unsuited Applications
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"e2e_nlg","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ee2e_nlg\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe E2E NLG dataset is an English benchmark dataset for data-to-text models that verbalize a set of 2-9\n          key-value attribute pairs in the restaurant domain. The version used for GEM is the cleaned E2E NLG dataset,\n          which filters examples with hallucinations and outputs that don't fully cover all input attributes.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/e2e_nlg')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/e2e_nlg\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://www.macs.hw.ac.uk/InteractionLab/E2E/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/W17-5525/\"\u003eFirst data release\u003c/a\u003e, \u003ca\n            href=\"https://doi.org/10.1016/j.csl.2019.06.009\"\u003eDetailed E2E Challenge writeup\u003c/a\u003e, \u003ca\n            href=\"https://www.aclweb.org/anthology/W19-8652/\"\u003eCleaned E2E version\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJekaterina Novikova, Ondrej Dusek and Verena Rieser\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eOndrej Dusek\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProducing a text informing/recommending a restaurant, given all and only the attributes specified on the\n          input.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://www.macs.hw.ac.uk/InteractionLab/E2E/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/tuetschek/e2e-cleaning\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/W17-5525/\"\u003eFirst data release\u003c/a\u003e, \u003ca\n                href=\"https://doi.org/10.1016/j.csl.2019.06.009\"\u003eDetailed E2E Challenge writeup\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W19-8652/\"\u003eCleaned E2E version\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{e2e_cleaned,\naddress = {Tokyo, Japan},\ntitle = {Semantic {Noise} {Matters} for {Neural} {Natural} {Language} {Generation}},\nurl = {https://www.aclweb.org/anthology/W19-8652/},\nbooktitle = {Proceedings of the 12th {International} {Conference} on {Natural} {Language} {Generation} ({INLG} 2019)},\nauthor = {Dušek, Ondřej and Howcroft, David M and Rieser, Verena},\nyear = {2019},\npages = {421--426},\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOndrej Dusek\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:odusek@ufal.mff.cuni.cz\"\u003eodusek@ufal.mff.cuni.cz\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialect-specific data was not collected and the language is general British English.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original dataset was collected using the CrowdFlower (now Appen) platform using native English\n              speakers (self-reported). No demographic information was provided, but the collection was geographically\n              limited to English-speaking countries.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was collected to test neural model on a very well specified realization task.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text informing/recommending a restaurant, given all and only the attributes specified on the\n              input.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHeriot-Watt University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJekaterina Novikova, Ondrej Dusek and Verena Rieser\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis research received funding from the EPSRC projects DILiGENt (EP/M005429/1) and MaDrIgAL\n              (EP/N017536/1).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille wrote the initial data card and Yacine Jernite the data loader. Sebastian Gehrmann migrated\n              the data card to the v2 format and moved the data loader to the hub.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is in a CSV format, with the following fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003emr\u003c/code\u003e -- the meaning representation (MR, input)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eref\u003c/code\u003e -- reference, i.e. the corresponding natural-language description (output)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThere are additional fields (\u003ccode\u003efixed\u003c/code\u003e, \u003ccode\u003eorig_mr\u003c/code\u003e) indicating whether the data was\n              modified in the\n              cleaning process and what was the original MR before cleaning, but these aren't used for NLG.\u003c/p\u003e\n            \u003cp\u003eThe MR has a flat structure -- attribute-value pairs are comma separated, with values\n              enclosed in brackets (see example above). There are 8 attributes:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003ename\u003c/code\u003e -- restaurant name\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003enear\u003c/code\u003e -- a landmark close to the restaurant\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003earea\u003c/code\u003e -- location (riverside, city centre)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003efood\u003c/code\u003e -- food type / cuisine (e.g. Japanese, Indian, English etc.)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eeatType\u003c/code\u003e -- restaurant type (restaurant, coffee shop, pub)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epriceRange\u003c/code\u003e -- price range (low, medium, high, \u0026#x3C;£20, £20-30, \u003e£30)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003erating\u003c/code\u003e -- customer rating (low, medium, high, 1/5, 3/5, 5/5)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003efamilyFriendly\u003c/code\u003e -- is the restaurant family-friendly (yes/no)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThe same MR is often repeated multiple times with different synonymous references.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe source MRs were generated automatically at random from a set of valid attribute values. The labels\n              were crowdsourced and are natural language\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"input\":  \"name[Alimentum], area[riverside], familyFriendly[yes], near[Burger King]\",\n\"target\": \"Alimentum is a kids friendly place in the riverside area near Burger King.\" \n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eMRs\u003c/th\u003e\n                    \u003cth\u003eDistinct MRs\u003c/th\u003e\n                    \u003cth\u003eReferences\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTraining\u003c/td\u003e\n                    \u003ctd\u003e12,568\u003c/td\u003e\n                    \u003ctd\u003e8,362\u003c/td\u003e\n                    \u003ctd\u003e33,525\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eDevelopment\u003c/td\u003e\n                    \u003ctd\u003e1,484\u003c/td\u003e\n                    \u003ctd\u003e1,132\u003c/td\u003e\n                    \u003ctd\u003e4,299\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTest\u003c/td\u003e\n                    \u003ctd\u003e1,847\u003c/td\u003e\n                    \u003ctd\u003e1,358\u003c/td\u003e\n                    \u003ctd\u003e4,693\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTotal\u003c/td\u003e\n                    \u003ctd\u003e15,899\u003c/td\u003e\n                    \u003ctd\u003e10,852\u003c/td\u003e\n                    \u003ctd\u003e42,517\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003e“Distinct MRs” are MRs that remain distinct even if restaurant/place names (attributes \u003ccode\u003ename\u003c/code\u003e,\n              \u003ccode\u003enear\u003c/code\u003e)\n              are delexicalized, i.e., replaced with a placeholder.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data are divided so that MRs in different splits do not overlap.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe E2E dataset is one of the largest limited-domain NLG datasets and is frequently used as a\n              data-to-text generation benchmark. The E2E Challenge included 20 systems of very different architectures,\n              with system outputs available for download.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is much cleaner than comparable datasets, and it is also a relatively easy task, making for a\n              straightforward evaluation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esurface realization.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4 special test sets for E2E were added to the GEM evaluation suite.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eWe created subsets of the training and development sets of ~500 randomly selected inputs each.\u003c/li\u003e\n              \u003cli\u003eWe applied input scrambling on a subset of 500 randomly selected test instances; the order of the\n                input properties was randomly reassigned.\u003c/li\u003e\n              \u003cli\u003eFor the input size, we created subpopulations based on the number of restaurant properties in the\n                input.\u003c/li\u003e\n            \u003c/ol\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput length\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e120\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e389\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e737\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e1187\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e1406\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e8\u003c/td\u003e\n                    \u003ctd\u003e774\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                    \u003ctd\u003e73\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization and robustness\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSurface realization.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe official evaluation script combines the MT-Eval and COCO Captioning libraries with the following\n              metrics.\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eBLEU\u003c/li\u003e\n              \u003cli\u003eCIDEr\u003c/li\u003e\n              \u003cli\u003eNIST\u003c/li\u003e\n              \u003cli\u003eMETEOR\u003c/li\u003e\n              \u003cli\u003eROUGE-L\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMost previous results, including the shared task results, used the library provided by the dataset\n              creators. The shared task also conducted a human evaluation using the following two criteria:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eQuality\u003c/code\u003e: When collecting quality ratings, system outputs were presented to crowd workers\n                together with the corresponding meaning representation, which implies that correctness of the NL\n                utterance relative to the MR should also influence this ranking. The crowd workers were asked: “How do\n                you judge the overall quality of the utterance in terms of its grammatical correctness, fluency,\n                adequacy and other important factors?”\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eNaturalness\u003c/code\u003e: When collecting naturalness ratings, system outputs were presented to crowd\n                workers without the corresponding meaning representation. The crowd workers were asked: “Could the\n                utterance have been produced by a native speaker?”\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe shared task writeup has in-depth evaluations of systems (\u003ca\n                href=\"https://www.sciencedirect.com/science/article/pii/S0885230819300919\"\u003ehttps://www.sciencedirect.com/science/article/pii/S0885230819300919\u003c/a\u003e)\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was collected to showcase/test neural NLG models. It is larger and contains more lexical\n              richness and syntactic variation than previous closed-domain NLG datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text informing/recommending a restaurant, given all and only the attributes specified on the\n              input.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHuman references describing the MRs were collected by crowdsourcing on the CrowdFlower (now Appen)\n              platform,\n              with either textual or pictorial MRs as a baseline.\n              The pictorial MRs were used in 20% of cases -- these yield higher lexical variation but introduce noise.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is focused on descriptions of restaurants.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere were basic checks (length, valid characters, repetition).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe cleaned version of the dataset which we are using in GEM was algorithmically filtered. They used\n              regular expressions to match all human-generated references with a more accurate input when attributes\n              were hallucinated or dropped. Additionally, train-test overlap stemming from the transformation was\n              removed. As a result, this data is much cleaner than the original dataset but not perfect (about 20% of\n              instances may have misaligned slots, compared to 40% of the original data.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince a crowdsourcing platform was used, the involved raters waived their rights to the data and are\n              aware that the produced annotations can be publicly released.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is artificial and does not contain any description of people.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe source data is generated randomly, so it should not contain biases. The human references may be\n              biased by the workers' demographic, but that was not investigated upon data collection.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe cleaned version still has data points with hallucinated or omitted attributes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data only pertains to the restaurant domain and the included attributes. A model cannot be expected\n              to handle other domains or attributes.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"e2e_nlg","type":"Data-to-Text","languages":"English","summary":"The E2E NLG dataset is an English benchmark dataset for data-to-text models that verbalize a set of 2-9 key-value attribute pairs in the restaurant domain. The version used for GEM is the cleaned E2E NLG dataset, which filters examples with hallucinations and outputs that don't fully cover all input attributes."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"e2e_nlg"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"e2e_nlg","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ee2e_nlg\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe E2E NLG dataset is an English benchmark dataset for data-to-text models that verbalize a set of 2-9\n          key-value attribute pairs in the restaurant domain. The version used for GEM is the cleaned E2E NLG dataset,\n          which filters examples with hallucinations and outputs that don't fully cover all input attributes.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/e2e_nlg')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/e2e_nlg\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://www.macs.hw.ac.uk/InteractionLab/E2E/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/W17-5525/\"\u003eFirst data release\u003c/a\u003e, \u003ca\n            href=\"https://doi.org/10.1016/j.csl.2019.06.009\"\u003eDetailed E2E Challenge writeup\u003c/a\u003e, \u003ca\n            href=\"https://www.aclweb.org/anthology/W19-8652/\"\u003eCleaned E2E version\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJekaterina Novikova, Ondrej Dusek and Verena Rieser\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eOndrej Dusek\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProducing a text informing/recommending a restaurant, given all and only the attributes specified on the\n          input.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://www.macs.hw.ac.uk/InteractionLab/E2E/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/tuetschek/e2e-cleaning\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/W17-5525/\"\u003eFirst data release\u003c/a\u003e, \u003ca\n                href=\"https://doi.org/10.1016/j.csl.2019.06.009\"\u003eDetailed E2E Challenge writeup\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W19-8652/\"\u003eCleaned E2E version\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{e2e_cleaned,\naddress = {Tokyo, Japan},\ntitle = {Semantic {Noise} {Matters} for {Neural} {Natural} {Language} {Generation}},\nurl = {https://www.aclweb.org/anthology/W19-8652/},\nbooktitle = {Proceedings of the 12th {International} {Conference} on {Natural} {Language} {Generation} ({INLG} 2019)},\nauthor = {Dušek, Ondřej and Howcroft, David M and Rieser, Verena},\nyear = {2019},\npages = {421--426},\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOndrej Dusek\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:odusek@ufal.mff.cuni.cz\"\u003eodusek@ufal.mff.cuni.cz\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialect-specific data was not collected and the language is general British English.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original dataset was collected using the CrowdFlower (now Appen) platform using native English\n              speakers (self-reported). No demographic information was provided, but the collection was geographically\n              limited to English-speaking countries.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was collected to test neural model on a very well specified realization task.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text informing/recommending a restaurant, given all and only the attributes specified on the\n              input.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHeriot-Watt University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJekaterina Novikova, Ondrej Dusek and Verena Rieser\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis research received funding from the EPSRC projects DILiGENt (EP/M005429/1) and MaDrIgAL\n              (EP/N017536/1).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille wrote the initial data card and Yacine Jernite the data loader. Sebastian Gehrmann migrated\n              the data card to the v2 format and moved the data loader to the hub.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is in a CSV format, with the following fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003emr\u003c/code\u003e -- the meaning representation (MR, input)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eref\u003c/code\u003e -- reference, i.e. the corresponding natural-language description (output)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThere are additional fields (\u003ccode\u003efixed\u003c/code\u003e, \u003ccode\u003eorig_mr\u003c/code\u003e) indicating whether the data was\n              modified in the\n              cleaning process and what was the original MR before cleaning, but these aren't used for NLG.\u003c/p\u003e\n            \u003cp\u003eThe MR has a flat structure -- attribute-value pairs are comma separated, with values\n              enclosed in brackets (see example above). There are 8 attributes:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003ename\u003c/code\u003e -- restaurant name\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003enear\u003c/code\u003e -- a landmark close to the restaurant\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003earea\u003c/code\u003e -- location (riverside, city centre)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003efood\u003c/code\u003e -- food type / cuisine (e.g. Japanese, Indian, English etc.)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eeatType\u003c/code\u003e -- restaurant type (restaurant, coffee shop, pub)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epriceRange\u003c/code\u003e -- price range (low, medium, high, \u0026#x3C;£20, £20-30, \u003e£30)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003erating\u003c/code\u003e -- customer rating (low, medium, high, 1/5, 3/5, 5/5)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003efamilyFriendly\u003c/code\u003e -- is the restaurant family-friendly (yes/no)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThe same MR is often repeated multiple times with different synonymous references.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe source MRs were generated automatically at random from a set of valid attribute values. The labels\n              were crowdsourced and are natural language\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"input\":  \"name[Alimentum], area[riverside], familyFriendly[yes], near[Burger King]\",\n\"target\": \"Alimentum is a kids friendly place in the riverside area near Burger King.\" \n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eMRs\u003c/th\u003e\n                    \u003cth\u003eDistinct MRs\u003c/th\u003e\n                    \u003cth\u003eReferences\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTraining\u003c/td\u003e\n                    \u003ctd\u003e12,568\u003c/td\u003e\n                    \u003ctd\u003e8,362\u003c/td\u003e\n                    \u003ctd\u003e33,525\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eDevelopment\u003c/td\u003e\n                    \u003ctd\u003e1,484\u003c/td\u003e\n                    \u003ctd\u003e1,132\u003c/td\u003e\n                    \u003ctd\u003e4,299\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTest\u003c/td\u003e\n                    \u003ctd\u003e1,847\u003c/td\u003e\n                    \u003ctd\u003e1,358\u003c/td\u003e\n                    \u003ctd\u003e4,693\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTotal\u003c/td\u003e\n                    \u003ctd\u003e15,899\u003c/td\u003e\n                    \u003ctd\u003e10,852\u003c/td\u003e\n                    \u003ctd\u003e42,517\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003e“Distinct MRs” are MRs that remain distinct even if restaurant/place names (attributes \u003ccode\u003ename\u003c/code\u003e,\n              \u003ccode\u003enear\u003c/code\u003e)\n              are delexicalized, i.e., replaced with a placeholder.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data are divided so that MRs in different splits do not overlap.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe E2E dataset is one of the largest limited-domain NLG datasets and is frequently used as a\n              data-to-text generation benchmark. The E2E Challenge included 20 systems of very different architectures,\n              with system outputs available for download.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is much cleaner than comparable datasets, and it is also a relatively easy task, making for a\n              straightforward evaluation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esurface realization.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4 special test sets for E2E were added to the GEM evaluation suite.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eWe created subsets of the training and development sets of ~500 randomly selected inputs each.\u003c/li\u003e\n              \u003cli\u003eWe applied input scrambling on a subset of 500 randomly selected test instances; the order of the\n                input properties was randomly reassigned.\u003c/li\u003e\n              \u003cli\u003eFor the input size, we created subpopulations based on the number of restaurant properties in the\n                input.\u003c/li\u003e\n            \u003c/ol\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput length\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e120\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e389\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e737\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e1187\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e1406\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e8\u003c/td\u003e\n                    \u003ctd\u003e774\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                    \u003ctd\u003e73\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization and robustness\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSurface realization.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe official evaluation script combines the MT-Eval and COCO Captioning libraries with the following\n              metrics.\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eBLEU\u003c/li\u003e\n              \u003cli\u003eCIDEr\u003c/li\u003e\n              \u003cli\u003eNIST\u003c/li\u003e\n              \u003cli\u003eMETEOR\u003c/li\u003e\n              \u003cli\u003eROUGE-L\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMost previous results, including the shared task results, used the library provided by the dataset\n              creators. The shared task also conducted a human evaluation using the following two criteria:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eQuality\u003c/code\u003e: When collecting quality ratings, system outputs were presented to crowd workers\n                together with the corresponding meaning representation, which implies that correctness of the NL\n                utterance relative to the MR should also influence this ranking. The crowd workers were asked: “How do\n                you judge the overall quality of the utterance in terms of its grammatical correctness, fluency,\n                adequacy and other important factors?”\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eNaturalness\u003c/code\u003e: When collecting naturalness ratings, system outputs were presented to crowd\n                workers without the corresponding meaning representation. The crowd workers were asked: “Could the\n                utterance have been produced by a native speaker?”\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe shared task writeup has in-depth evaluations of systems (\u003ca\n                href=\"https://www.sciencedirect.com/science/article/pii/S0885230819300919\"\u003ehttps://www.sciencedirect.com/science/article/pii/S0885230819300919\u003c/a\u003e)\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was collected to showcase/test neural NLG models. It is larger and contains more lexical\n              richness and syntactic variation than previous closed-domain NLG datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProducing a text informing/recommending a restaurant, given all and only the attributes specified on the\n              input.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHuman references describing the MRs were collected by crowdsourcing on the CrowdFlower (now Appen)\n              platform,\n              with either textual or pictorial MRs as a baseline.\n              The pictorial MRs were used in 20% of cases -- these yield higher lexical variation but introduce noise.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is focused on descriptions of restaurants.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere were basic checks (length, valid characters, repetition).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe cleaned version of the dataset which we are using in GEM was algorithmically filtered. They used\n              regular expressions to match all human-generated references with a more accurate input when attributes\n              were hallucinated or dropped. Additionally, train-test overlap stemming from the transformation was\n              removed. As a result, this data is much cleaner than the original dataset but not perfect (about 20% of\n              instances may have misaligned slots, compared to 40% of the original data.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince a crowdsourcing platform was used, the involved raters waived their rights to the data and are\n              aware that the produced annotations can be publicly released.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is artificial and does not contain any description of people.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe source data is generated randomly, so it should not contain biases. The human references may be\n              biased by the workers' demographic, but that was not investigated upon data collection.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe cleaned version still has data points with hallucinated or omitted attributes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data only pertains to the restaurant domain and the included attributes. A model cannot be expected\n              to handle other domains or attributes.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"e2e_nlg","type":"Data-to-Text","languages":"English","summary":"The E2E NLG dataset is an English benchmark dataset for data-to-text models that verbalize a set of 2-9 key-value attribute pairs in the restaurant domain. The version used for GEM is the cleaned E2E NLG dataset, which filters examples with hallucinations and outputs that don't fully cover all input attributes."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"e2e_nlg"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/indonlg.html b/data_cards/indonlg.html
index 985d3135..c11a8fec 100644
--- a/data_cards/indonlg.html
+++ b/data_cards/indonlg.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->indonlg</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">indonlg</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->indonlg</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">indonlg</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>indonlg</h2>
@@ -1366,4 +1366,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"indonlg","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eindonlg\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eIndoNLG is a collection of various Indonesian, Javanese, and Sundanese NLG tasks including summarization,\n          question answering, chit-chat, and three different pairs of machine translation (MT) tasks.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/indonlg')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/indonlg\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/indobenchmark/indonlg\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.699\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSamuel Cahyawijaya, Genta Indra Winata, Bryan Wilie, Karissa Vincentio, Xiaohong Li, Adhiguna Kuncoro,\n          Sebastian Ruder, Zhi Yuan Lim, Syafri Bahar, Masayu Leylia Khodra, Ayu Purwarianti, Pascale Fung\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGenta Indra Winata\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eIndonesian\u003c/code\u003e, \u003ccode\u003eJavanese\u003c/code\u003e, \u003ccode\u003eSundanese\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003emit: MIT License\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGenerate a response according to the context and text.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/indobenchmark/indonlg\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/indobenchmark/indonlg\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.699\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{cahyawijaya-etal-2021-indonlg, title =  '{I}ndo{NLG}: Benchmark and Resources for Evaluating {I}ndonesian Natural Language Generation ', author =  'Cahyawijaya, Samuel and Winata, Genta Indra and Wilie, Bryan and Vincentio, Karissa and Li, Xiaohong and Kuncoro, Adhiguna and Ruder, Sebastian and Lim, Zhi Yuan and Bahar, Syafri and Khodra, Masayu and Purwarianti, Ayu and Fung, Pascale ', booktitle =  'Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing ', month = nov, year =  '2021 ', address =  'Online and Punta Cana, Dominican Republic ', publisher =  'Association for Computational Linguistics ', url =  'https://aclanthology.org/2021.emnlp-main.699 ', pages =  '8875--8898 ', abstract =  'Natural language generation (NLG) benchmarks provide an important avenue to measure progress and develop better NLG systems. Unfortunately, the lack of publicly available NLG benchmarks for low-resource languages poses a challenging barrier for building NLG systems that work well for languages with limited amounts of data. Here we introduce IndoNLG, the first benchmark to measure natural language generation (NLG) progress in three low-resource{---}yet widely spoken{---}languages of Indonesia: Indonesian, Javanese, and Sundanese. Altogether, these languages are spoken by more than 100 million native speakers, and hence constitute an important use case of NLG systems today. Concretely, IndoNLG covers six tasks: summarization, question answering, chit-chat, and three different pairs of machine translation (MT) tasks. We collate a clean pretraining corpus of Indonesian, Sundanese, and Javanese datasets, Indo4B-Plus, which is used to pretrain our models: IndoBART and IndoGPT. We show that IndoBART and IndoGPT achieve competitive performance on all tasks{---}despite using only one-fifth the parameters of a larger multilingual model, mBART-large (Liu et al., 2020). This finding emphasizes the importance of pretraining on closely related, localized languages to achieve more efficient learning and faster inference at very low-resource languages like Javanese and Sundanese. ',}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenta Indra Winata\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:gentaindrawinata@gmail.com\"\u003egentaindrawinata@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eIndonesian\u003c/code\u003e, \u003ccode\u003eJavanese\u003c/code\u003e, \u003ccode\u003eSundanese\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emit: MIT License\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIndoNLG is a collection of Natural Language Generation (NLG) resources for Bahasa Indonesia with 10\n              downstream tasks.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate a response according to the context and text.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Hong Kong University of Science and Technology, Gojek, Institut Teknologi Bandung, Universitas\n              Multimedia Nusantara, DeepMind, Prosa.ai\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSamuel Cahyawijaya, Genta Indra Winata, Bryan Wilie, Karissa Vincentio, Xiaohong Li, Adhiguna Kuncoro,\n              Sebastian Ruder, Zhi Yuan Lim, Syafri Bahar, Masayu Leylia Khodra, Ayu Purwarianti, Pascale Fung\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Hong Kong University of Science and Technology, Gojek, Institut Teknologi Bandung, Universitas\n              Multimedia Nusantara, DeepMind, Prosa.ai\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenta Indra Winata (The Hong Kong University of Science and Technology)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog understanding, summarization, translation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU evaluates the generation quality.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eParticipatory experiment\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators agree using the dataset for research purpose.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAny\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e``\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"indonlg","type":"Summarization","languages":"Indonesian, Javanese, Sundanese","summary":"IndoNLG is a collection of various Indonesian, Javanese, and Sundanese NLG tasks including summarization, question answering, chit-chat, and three different pairs of machine translation (MT) tasks."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"indonlg"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"indonlg","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eindonlg\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eIndoNLG is a collection of various Indonesian, Javanese, and Sundanese NLG tasks including summarization,\n          question answering, chit-chat, and three different pairs of machine translation (MT) tasks.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/indonlg')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/indonlg\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/indobenchmark/indonlg\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.699\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSamuel Cahyawijaya, Genta Indra Winata, Bryan Wilie, Karissa Vincentio, Xiaohong Li, Adhiguna Kuncoro,\n          Sebastian Ruder, Zhi Yuan Lim, Syafri Bahar, Masayu Leylia Khodra, Ayu Purwarianti, Pascale Fung\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGenta Indra Winata\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eIndonesian\u003c/code\u003e, \u003ccode\u003eJavanese\u003c/code\u003e, \u003ccode\u003eSundanese\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003emit: MIT License\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGenerate a response according to the context and text.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/indobenchmark/indonlg\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/indobenchmark/indonlg\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.emnlp-main.699\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{cahyawijaya-etal-2021-indonlg, title =  '{I}ndo{NLG}: Benchmark and Resources for Evaluating {I}ndonesian Natural Language Generation ', author =  'Cahyawijaya, Samuel and Winata, Genta Indra and Wilie, Bryan and Vincentio, Karissa and Li, Xiaohong and Kuncoro, Adhiguna and Ruder, Sebastian and Lim, Zhi Yuan and Bahar, Syafri and Khodra, Masayu and Purwarianti, Ayu and Fung, Pascale ', booktitle =  'Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing ', month = nov, year =  '2021 ', address =  'Online and Punta Cana, Dominican Republic ', publisher =  'Association for Computational Linguistics ', url =  'https://aclanthology.org/2021.emnlp-main.699 ', pages =  '8875--8898 ', abstract =  'Natural language generation (NLG) benchmarks provide an important avenue to measure progress and develop better NLG systems. Unfortunately, the lack of publicly available NLG benchmarks for low-resource languages poses a challenging barrier for building NLG systems that work well for languages with limited amounts of data. Here we introduce IndoNLG, the first benchmark to measure natural language generation (NLG) progress in three low-resource{---}yet widely spoken{---}languages of Indonesia: Indonesian, Javanese, and Sundanese. Altogether, these languages are spoken by more than 100 million native speakers, and hence constitute an important use case of NLG systems today. Concretely, IndoNLG covers six tasks: summarization, question answering, chit-chat, and three different pairs of machine translation (MT) tasks. We collate a clean pretraining corpus of Indonesian, Sundanese, and Javanese datasets, Indo4B-Plus, which is used to pretrain our models: IndoBART and IndoGPT. We show that IndoBART and IndoGPT achieve competitive performance on all tasks{---}despite using only one-fifth the parameters of a larger multilingual model, mBART-large (Liu et al., 2020). This finding emphasizes the importance of pretraining on closely related, localized languages to achieve more efficient learning and faster inference at very low-resource languages like Javanese and Sundanese. ',}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenta Indra Winata\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:gentaindrawinata@gmail.com\"\u003egentaindrawinata@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eIndonesian\u003c/code\u003e, \u003ccode\u003eJavanese\u003c/code\u003e, \u003ccode\u003eSundanese\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emit: MIT License\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIndoNLG is a collection of Natural Language Generation (NLG) resources for Bahasa Indonesia with 10\n              downstream tasks.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate a response according to the context and text.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Hong Kong University of Science and Technology, Gojek, Institut Teknologi Bandung, Universitas\n              Multimedia Nusantara, DeepMind, Prosa.ai\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSamuel Cahyawijaya, Genta Indra Winata, Bryan Wilie, Karissa Vincentio, Xiaohong Li, Adhiguna Kuncoro,\n              Sebastian Ruder, Zhi Yuan Lim, Syafri Bahar, Masayu Leylia Khodra, Ayu Purwarianti, Pascale Fung\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Hong Kong University of Science and Technology, Gojek, Institut Teknologi Bandung, Universitas\n              Multimedia Nusantara, DeepMind, Prosa.ai\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenta Indra Winata (The Hong Kong University of Science and Technology)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog understanding, summarization, translation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU evaluates the generation quality.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eParticipatory experiment\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators agree using the dataset for research purpose.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAny\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e``\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"indonlg","type":"Summarization","languages":"Indonesian, Javanese, Sundanese","summary":"IndoNLG is a collection of various Indonesian, Javanese, and Sundanese NLG tasks including summarization, question answering, chit-chat, and three different pairs of machine translation (MT) tasks."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"indonlg"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/mlb_data_to_text.html b/data_cards/mlb_data_to_text.html
index c48f8e0d..cf5dd59a 100644
--- a/data_cards/mlb_data_to_text.html
+++ b/data_cards/mlb_data_to_text.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->mlb_data_to_text</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">mlb_data_to_text</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->mlb_data_to_text</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">mlb_data_to_text</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>mlb_data_to_text</h2>
@@ -1903,4 +1903,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"mlb_data_to_text","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003emlb_data_to_text\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe MLB dataset is an English sport-related data-to-text dataset in the baseball domain. The input is a large\n          table with results of a game and the output is a description of the game.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/mlb_data_to_text')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/mlb_data_to_text\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/ratishsp/mlb-data-scripts\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/P19-1195\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eRatish Puduppully, Li Dong, Mirella Lapata\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eRatish Puduppully\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProduce a summary of MLB game from its statistics.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/ratishsp/mlb-data-scripts\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/ratishsp/mlb-data-scripts\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/P19-1195\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{puduppully-etal-2019-data,\ntitle = \"Data-to-text Generation with Entity Modeling\",\nauthor = \"Puduppully, Ratish  and\nDong, Li  and\nLapata, Mirella\",\nbooktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2019\",\naddress = \"Florence, Italy\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/P19-1195\",\ndoi = \"10.18653/v1/P19-1195\",\npages = \"2023--2035\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRatish Puduppully\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:ratishpuduppully@gmail.com\"\u003eratishpuduppully@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset can be used to study data-to-text generation. The dataset is in sports domain. It pairs\n              statistics of Major League Baseball (MLB) game with its summary. The summary is in the form of a document\n              containing an average of 540 tokens. Thus it is useful to study long document generation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAdd. License Info\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the 'other' license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRestricted to non-commercial research purposes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProduce a summary of MLB game from its statistics.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Edinburgh\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRatish Puduppully, Li Dong, Mirella Lapata\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e        features = datasets.Features(\n      {\n          \"home_name\": datasets.Value(\"string\"),\n          \"box_score\": [\n              {\n                  \"p_l\": datasets.Value(\"string\"),\n                  \"last_name\": datasets.Value(\"string\"),\n                  \"p_h\": datasets.Value(\"string\"),\n                  \"sac\": datasets.Value(\"string\"),\n                  \"p_bb\": datasets.Value(\"string\"),\n                  \"pos\": datasets.Value(\"string\"),\n                  \"ao\": datasets.Value(\"string\"),\n                  \"p_bf\": datasets.Value(\"string\"),\n                  \"cs\": datasets.Value(\"string\"),\n                  \"hbp\": datasets.Value(\"string\"),\n                  \"ab\": datasets.Value(\"string\"),\n                  \"full_name\": datasets.Value(\"string\"),\n                  \"p_w\": datasets.Value(\"string\"),\n                  \"go\": datasets.Value(\"string\"),\n                  \"fldg\": datasets.Value(\"string\"),\n                  \"p_bs\": datasets.Value(\"string\"),\n                  \"avg\": datasets.Value(\"string\"),\n                  \"p_r\": datasets.Value(\"string\"),\n                  \"p_s\": datasets.Value(\"string\"),\n                  \"lob\": datasets.Value(\"string\"),\n                  \"first_name\": datasets.Value(\"string\"),\n                  \"p_sv\": datasets.Value(\"string\"),\n                  \"p_so\": datasets.Value(\"string\"),\n                  \"p_save\": datasets.Value(\"string\"),\n                  \"p_hr\": datasets.Value(\"string\"),\n                  \"po\": datasets.Value(\"string\"),\n                  \"p_ip1\": datasets.Value(\"string\"),\n                  \"p_ip2\": datasets.Value(\"string\"),\n                  \"bb\": datasets.Value(\"string\"),\n                  \"ops\": datasets.Value(\"string\"),\n                  \"p_hld\": datasets.Value(\"string\"),\n                  \"bo\": datasets.Value(\"string\"),\n                  \"p_loss\": datasets.Value(\"string\"),\n                  \"e\": datasets.Value(\"string\"),\n                  \"p_game_score\": datasets.Value(\"string\"),\n                  \"p_win\": datasets.Value(\"string\"),\n                  \"a\": datasets.Value(\"string\"),\n                  \"p_era\": datasets.Value(\"string\"),\n                  \"d\": datasets.Value(\"string\"),\n                  \"p_out\": datasets.Value(\"string\"),\n                  \"h\": datasets.Value(\"string\"),\n                  \"p_er\": datasets.Value(\"string\"),\n                  \"p_np\": datasets.Value(\"string\"),\n                  \"hr\": datasets.Value(\"string\"),\n                  \"r\": datasets.Value(\"string\"),\n                  \"so\": datasets.Value(\"string\"),\n                  \"t\": datasets.Value(\"string\"),\n                  \"rbi\": datasets.Value(\"string\"),\n                  \"team\": datasets.Value(\"string\"),\n                  \"sb\": datasets.Value(\"string\"),\n                  \"slg\": datasets.Value(\"string\"),\n                  \"sf\": datasets.Value(\"string\"),\n                  \"obp\": datasets.Value(\"string\"),\n              }\n          ],\n          \"home_city\": datasets.Value(\"string\"),\n          \"vis_name\": datasets.Value(\"string\"),\n          \"play_by_play\": [{\n              \"top\": [{\n                  \"runs\": datasets.Value(\"string\"),\n                  \"scorers\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"pitcher\": datasets.Value(\"string\"),\n                  \"o\": datasets.Value(\"string\"),\n                  \"b\": datasets.Value(\"string\"),\n                  \"s\": datasets.Value(\"string\"),\n                  \"batter\": datasets.Value(\"string\"),\n                  \"b1\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"b2\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"b3\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"event\": datasets.Value(\"string\"),\n                  \"event2\": datasets.Value(\"string\"),\n                  \"home_team_runs\": datasets.Value(\"string\"),\n                  \"away_team_runs\": datasets.Value(\"string\"),\n                  \"rbi\": datasets.Value(\"string\"),\n                  \"error_runs\": datasets.Value(\"string\"),\n                  \"fielder_error\": datasets.Value(\"string\")\n              }\n              ],\n              \"bottom\": [{\n                  \"runs\": datasets.Value(\"string\"),\n                  \"scorers\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"pitcher\": datasets.Value(\"string\"),\n                  \"o\": datasets.Value(\"string\"),\n                  \"b\": datasets.Value(\"string\"),\n                  \"s\": datasets.Value(\"string\"),\n                  \"batter\": datasets.Value(\"string\"),\n                  \"b1\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"b2\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"b3\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"event\": datasets.Value(\"string\"),\n                  \"event2\": datasets.Value(\"string\"),\n                  \"home_team_runs\": datasets.Value(\"string\"),\n                  \"away_team_runs\": datasets.Value(\"string\"),\n                  \"rbi\": datasets.Value(\"string\"),\n                  \"error_runs\": datasets.Value(\"string\"),\n                  \"fielder_error\": datasets.Value(\"string\")\n              }\n              ],\n              \"inning\": datasets.Value(\"string\")\n          }\n          ],\n          \"vis_line\": {\n              \"innings\": [{\n               \"inn\": datasets.Value(\"string\"),\n               \"runs\": datasets.Value(\"string\")\n              }\n              ],\n              \"result\": datasets.Value(\"string\"),\n              \"team_runs\": datasets.Value(\"string\"),\n              \"team_hits\": datasets.Value(\"string\"),\n              \"team_errors\": datasets.Value(\"string\"),\n              \"team_name\": datasets.Value(\"string\"),\n              \"team_city\": datasets.Value(\"string\")\n          },\n          \"home_line\": {\n              \"innings\": [{\n                  \"inn\": datasets.Value(\"string\"),\n                  \"runs\": datasets.Value(\"string\")\n              }\n              ],\n              \"result\": datasets.Value(\"string\"),\n              \"team_runs\": datasets.Value(\"string\"),\n              \"team_hits\": datasets.Value(\"string\"),\n              \"team_errors\": datasets.Value(\"string\"),\n              \"team_name\": datasets.Value(\"string\"),\n              \"team_city\": datasets.Value(\"string\")\n          },\n          \"vis_city\": datasets.Value(\"string\"),\n          \"day\": datasets.Value(\"string\"),\n          \"summary\": [\n              datasets.Value(\"string\"),\n          ],\n          \"gem_id\": datasets.Value(\"string\")\n      }\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe high level structure contains the following attributes: home_name, vis_name, home_city, vis_city,\n              summary, summary_eval, day, gem_id, box_score, play_by_play, home_line, vis_line.\n              The attributes home_name, vis_name, home_city, vis_city and day are string values.\n              The attribute \"summary\" contains the summary in the form of a list of tokens.\n              The attribute \"summary_eval\" contains the summary in the form of a string of tokens. The difference from\n              \"summary\" field is that \"summary_eval\" doesn't contain \"\u003cem\u003eNEWPARAGRAPH\u003c/em\u003e\" delimiters to separate the\n              paragraphs. \"summary_eval\" field should be used to evaluate model outputs. \"summary\" field may be used\n              during the training process.\n              box_score contains the box score statistics of the players in the game. It is in the form of a list (of\n              average size 90), with each element describing the statistics of a player. The box score statistics\n              contain 53 attributes.\n              The description of the attributes is given below. The descriptions of most of the attributes is obtained\n              from \u003ca href=\"https://www.mlb.com/glossary/standard-stats\"\u003emlb.com\u003c/a\u003e.\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003er : Runs scored by a player in the game.\u003c/li\u003e\n              \u003cli\u003erbi Runs Batted In (RBI): action of a batter results in a run scored by other players in the team.\n              \u003c/li\u003e\n              \u003cli\u003epos Position of the player.\u003c/li\u003e\n              \u003cli\u003eavg Batting Average. It is an indicator of the hits in the players' career.\u003c/li\u003e\n              \u003cli\u003ebb A walk occurs when a pitcher throws four pitches out of the strike zone, none of which are swung at\n                by the hitter.\u003c/li\u003e\n              \u003cli\u003ehr Batter hits the ball in the air over the outfield fence.\u003c/li\u003e\n              \u003cli\u003ep_r Runs given by a pitcher in the game.\u003c/li\u003e\n              \u003cli\u003ep_bb Walks allowed by pitcher in a game.\u003c/li\u003e\n              \u003cli\u003ep_h Hits allowed by pitcher in a game.\u003c/li\u003e\n              \u003cli\u003ep_hr Home runs allowed by pitcher in a game.\u003c/li\u003e\n              \u003cli\u003ep_er Earned Run (ER): An earned run is any run that scores against a pitcher.\u003c/li\u003e\n              \u003cli\u003ep_era Earned Run Average (ERA): Earned run average represents the number of earned runs a pitcher\n                allows per nine innings.\u003c/li\u003e\n              \u003cli\u003ep_np Number of Pitches: A pitcher's total number of pitches is determined by all the pitches he throws\n                in game.\u003c/li\u003e\n              \u003cli\u003ep_ip1 Innings Pitched (IP1): Innings pitched measures the number of innings a pitcher remains in a\n                game. Because there are three outs in an inning, each out recorded represents one-third of an inning\n                pitched.\u003c/li\u003e\n              \u003cli\u003ep_ip2 Innings Pitched (IP2): Innings pitched measures the number of innings a pitcher remains in a\n                game. Because there are three outs in an inning, each out recorded represents one-third of an inning\n                pitched.\u003c/li\u003e\n              \u003cli\u003ep_w A pitcher receives a win when he is the pitcher of record when his team takes the lead for good.\n              \u003c/li\u003e\n              \u003cli\u003ep_l A pitcher receives a loss when a run that is charged to him proves to be the go-ahead run in the\n                game, giving the opposing team a lead it never gives up.\u003c/li\u003e\n              \u003cli\u003ep_so A strikeout occurs when a pitcher throws any combination of three swinging or looking strikes to\n                a hitter.\u003c/li\u003e\n              \u003cli\u003ep_save Save: A save is awarded to the relief pitcher who finishes a game for the winning team. A\n                pitcher cannot receive a save and a win in the same game.\u003c/li\u003e\n              \u003cli\u003ep_sv Saves: The count of saves recorded by a pitcher in his career.\u003c/li\u003e\n              \u003cli\u003esac A sacrifice fly occurs when a batter hits a fly-ball out to the outfield or foul territory that\n                allows a runner to score.\u003c/li\u003e\n              \u003cli\u003ep_bf Batters faced is simply a count of the number of total plate appearances against a certain\n                pitcher or team. In a perfect game -- with 27 outs -- a pitcher will record 27 batters faced.\u003c/li\u003e\n              \u003cli\u003ecs A caught stealing occurs when a runner attempts to steal but is tagged out before reaching second\n                base, third base or home plate.\u003c/li\u003e\n              \u003cli\u003ehbp A hit-by-pitch occurs when a batter is struck by a pitched ball without swinging at it. He is\n                awarded first base as a result.\u003c/li\u003e\n              \u003cli\u003eab An official at-bat comes when a batter reaches base via a fielder's choice, hit or an error (not\n                including catcher's interference) or when a batter is put out on a non-sacrifice.\u003c/li\u003e\n              \u003cli\u003ep_bs A blown save occurs when a relief pitcher enters a game in a save situation, but allows the tying\n                run to score.\u003c/li\u003e\n              \u003cli\u003ep_s The count of strikes thrown by a pitcher\u003c/li\u003e\n              \u003cli\u003elob Left on base can be viewed as both an individual statistic or as a team statistic. In an\n                individual batter's case, it refers to how many men remain on base after that batter makes an out at the\n                plate, as the batter has failed to do his job to score those runners -- or at least put himself in a\n                position to score. In a team's case or in an individual pitcher's case, it refers to the number of men\n                who remain on base at the end of an inning.\u003c/li\u003e\n              \u003cli\u003epo A fielder is credited with a putout when he is the fielder who physically records the act of\n                completing an out -- whether it be by stepping on the base for a forceout, tagging a runner, catching a\n                batted ball, or catching a third strike\u003c/li\u003e\n              \u003cli\u003eops OPS adds on-base percentage and slugging percentage to get one number that unites the two. It's\n                meant to combine how well a hitter can reach base, with how well he can hit for average and for power.\n              \u003c/li\u003e\n              \u003cli\u003ep_hld A hold occurs when a relief pitcher enters the game in a save situation and maintains his team's\n                lead for the next relief pitcher, while recording at least one out.\u003c/li\u003e\n              \u003cli\u003ep_loss True/False- Indicates losing pitcher\u003c/li\u003e\n              \u003cli\u003ee A fielder is given an error if, in the judgment of the official scorer, he fails to convert an out\n                on a play that an average fielder should have made.\u003c/li\u003e\n              \u003cli\u003ep_win True/False- Indicates winning pitcher\u003c/li\u003e\n              \u003cli\u003ea An assist is awarded to a fielder who touches the ball before a putout is recorded by another\n                fielder.\u003c/li\u003e\n              \u003cli\u003eh A hit occurs when a batter strikes the baseball into fair territory and reaches base without doing\n                so via an error or a fielder's choice.\u003c/li\u003e\n              \u003cli\u003eso A strikeout of a batter\u003c/li\u003e\n              \u003cli\u003eteam Team of the player\u003c/li\u003e\n              \u003cli\u003esb A stolen base occurs when a baserunner advances by taking a base to which he isn't entitled.\u003c/li\u003e\n              \u003cli\u003eslg Slugging percentage represents the total number of bases a player records per at-bat. Unlike\n                on-base percentage, slugging percentage deals only with hits and does not include walks and\n                hit-by-pitches in its equation.\u003c/li\u003e\n              \u003cli\u003esf A sacrifice fly occurs when a batter hits a fly-ball out to the outfield or foul territory that\n                allows a runner to score.\u003c/li\u003e\n              \u003cli\u003eobp OBP refers to how frequently a batter reaches base per plate appearance. Times on base include\n                hits, walks and hit-by-pitches, but do not include errors, times reached on a fielder's choice or a\n                dropped third strike.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThe description of attributes in play-by-play is below:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003ebatter Batter in the play.\u003c/li\u003e\n              \u003cli\u003epitcher Pitcher in play.\u003c/li\u003e\n              \u003cli\u003eb1 Player/s at first base position.\u003c/li\u003e\n              \u003cli\u003eb2 Player/s at second base position.\u003c/li\u003e\n              \u003cli\u003eb3 Player/s at third base position.\u003c/li\u003e\n              \u003cli\u003escorers Player/s scored in the play.\u003c/li\u003e\n              \u003cli\u003efielder_error Player committed field error.\u003c/li\u003e\n              \u003cli\u003eevent Event of the play such as single, double, home run etc.\u003c/li\u003e\n              \u003cli\u003eevent2 Second event of the play such as wild pitch, error etc.\u003c/li\u003e\n              \u003cli\u003einning Inning of the play.\u003c/li\u003e\n              \u003cli\u003etop/ bottom If home team is batting it is bottom and if away team is batting it is top.\u003c/li\u003e\n              \u003cli\u003eo Count of outs\u003c/li\u003e\n              \u003cli\u003eb Count of balls\u003c/li\u003e\n              \u003cli\u003es Count of strikes\u003c/li\u003e\n              \u003cli\u003er Count of runs\u003c/li\u003e\n              \u003cli\u003erbi Count of runs batted in (rbi)\u003c/li\u003e\n              \u003cli\u003eerror_runs Runs due to error\u003c/li\u003e\n              \u003cli\u003ehome_team_runs Score of home team\u003c/li\u003e\n              \u003cli\u003evis_team_runs Score of visiting team\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003e\u003ccode\u003ehome_line\u003c/code\u003e and \u003ccode\u003evis_line\u003c/code\u003e contain string value pairs for \u003ccode\u003eteam_name\u003c/code\u003e,\n              \u003ccode\u003eteam_city\u003c/code\u003e, \u003ccode\u003eteam_runs\u003c/code\u003e, \u003ccode\u003eteam_hits\u003c/code\u003e, \u003ccode\u003eteam_error\u003c/code\u003e,\n              \u003ccode\u003eresult\u003c/code\u003e, and a list of runs scored in each inning.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are three splits in the dataset: train, validation and test\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits are random.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset can verify if models are capable of long document generation. The challenges in long\n              document generation conditioned on input tables include ensuring coherent output, staying faithful to the\n              input, ensuring fluent output and avoiding repetition of text. Such aspects can be verified on models\n              trained on this dataset\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCompared to the existing RotoWire (Wiseman et al. 2017) dataset, MLB summaries are longer (approximately\n              by 50%) and the input records are richer and more structured (with the addition of play-by-play).\n              Moreover, the MLB dataset is five times larger in terms of data size (i.e., pairs of tables and game\n              summaries).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLong document generation, coherent ordering of information, faithfulness to the input statistics, fluency\n              in generation and avoiding repetition of text.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points removed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSome examples have been removed from training dataset which satisfied the below criteria:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eThe examples in training dataset which overlapped with validation/test.\u003c/li\u003e\n              \u003cli\u003eSome examples which described washed out games.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe \u003ca href=\"https://aclanthology.org/P19-1195\"\u003eresearch paper\u003c/a\u003e is a good resource\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatic evaluation measure can evaluate the factuality, content selection, content ordering and the\n              fluency of the model output. The factuality, content selection and content ordering is measured using an\n              Information Extraction based evaluation approach introduced by Wiseman et al (2017). The fluency is\n              measured using BLEU.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWiseman et al. (2017) define three metrics induced from the outputs of an Information Extraction model\n              which is run on the model/human-written game summaries . Let ŷ be the gold summary and y the model output.\n              • Relation Generation (RG) measures the precision and count of relations extracted from y that also appear\n              in records r.\n              • Content Selection (CS) measures the precision and recall of relations extracted from y that are also\n              extracted from ŷ.\n              • Content Ordering (CO) measures the complement of the normalized Damerau-Levenshtein distance (Brill and\n              Moore, 2000) between the sequences of relations extracted from y and ŷ\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe have reused the automatic metrics based on Information Extraction evaluation introduced by Wiseman et\n              al (2017). For human evaluation, we conducted studies to evaluate the factuality, coherence,\n              grammaticality and conciseness.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe most relevant previous results for dataset are in the TACL 2021 paper on \u003ca\n                href=\"https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00381/101876/Data-to-text-Generation-with-Macro-Planning\"\u003eData-to-text\n                Generation with Macro Planning\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset was curated to complement an existing data-to-text generation dataset (RotoWire by Wiseman\n              et al. 2017) which focuses on long document generation. Compared to RotoWire , MLB summaries are longer\n              (approximately by 50%) and the input records are richer and more structured (with the addition of\n              play-by-play). Moreover, the MLB dataset is five times larger in terms of data size (i.e., pairs of tables\n              and game summaries)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal is to study automatic generation of long documents in a data-to-text setting. The generated\n              summaries should exhibit coherent ordering of content, be faithful to the input statistics, be fluent and\n              avoid repetition of text.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe game summaries are produced by professional writers.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language focuses on the sports domain.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGame summaries were tokenized using NLTK (Bird et al., 2009) and hyphenated words were separated.\n              Sentences containing quotes were removed as they included opinions and non-factual statements unrelated to\n              the input tables. Sometimes MLB summaries contain a \"Game notes\" section with incidental information which\n              was also removed.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe copyright remains with the original data creators and the usage permission is restricted to\n              non-commercial uses.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003esensitive information\u003c/code\u003e, \u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"mlb_data_to_text","type":"Data-to-Text","languages":"English","summary":"The MLB dataset is an English sport-related data-to-text dataset in the baseball domain. The input is a large table with results of a game and the output is a description of the game."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"mlb_data_to_text"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"mlb_data_to_text","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003emlb_data_to_text\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe MLB dataset is an English sport-related data-to-text dataset in the baseball domain. The input is a large\n          table with results of a game and the output is a description of the game.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/mlb_data_to_text')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/mlb_data_to_text\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/ratishsp/mlb-data-scripts\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/P19-1195\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eRatish Puduppully, Li Dong, Mirella Lapata\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eRatish Puduppully\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProduce a summary of MLB game from its statistics.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/ratishsp/mlb-data-scripts\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/ratishsp/mlb-data-scripts\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/P19-1195\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{puduppully-etal-2019-data,\ntitle = \"Data-to-text Generation with Entity Modeling\",\nauthor = \"Puduppully, Ratish  and\nDong, Li  and\nLapata, Mirella\",\nbooktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2019\",\naddress = \"Florence, Italy\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/P19-1195\",\ndoi = \"10.18653/v1/P19-1195\",\npages = \"2023--2035\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRatish Puduppully\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:ratishpuduppully@gmail.com\"\u003eratishpuduppully@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset can be used to study data-to-text generation. The dataset is in sports domain. It pairs\n              statistics of Major League Baseball (MLB) game with its summary. The summary is in the form of a document\n              containing an average of 540 tokens. Thus it is useful to study long document generation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAdd. License Info\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the 'other' license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRestricted to non-commercial research purposes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProduce a summary of MLB game from its statistics.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Edinburgh\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRatish Puduppully, Li Dong, Mirella Lapata\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e        features = datasets.Features(\n      {\n          \"home_name\": datasets.Value(\"string\"),\n          \"box_score\": [\n              {\n                  \"p_l\": datasets.Value(\"string\"),\n                  \"last_name\": datasets.Value(\"string\"),\n                  \"p_h\": datasets.Value(\"string\"),\n                  \"sac\": datasets.Value(\"string\"),\n                  \"p_bb\": datasets.Value(\"string\"),\n                  \"pos\": datasets.Value(\"string\"),\n                  \"ao\": datasets.Value(\"string\"),\n                  \"p_bf\": datasets.Value(\"string\"),\n                  \"cs\": datasets.Value(\"string\"),\n                  \"hbp\": datasets.Value(\"string\"),\n                  \"ab\": datasets.Value(\"string\"),\n                  \"full_name\": datasets.Value(\"string\"),\n                  \"p_w\": datasets.Value(\"string\"),\n                  \"go\": datasets.Value(\"string\"),\n                  \"fldg\": datasets.Value(\"string\"),\n                  \"p_bs\": datasets.Value(\"string\"),\n                  \"avg\": datasets.Value(\"string\"),\n                  \"p_r\": datasets.Value(\"string\"),\n                  \"p_s\": datasets.Value(\"string\"),\n                  \"lob\": datasets.Value(\"string\"),\n                  \"first_name\": datasets.Value(\"string\"),\n                  \"p_sv\": datasets.Value(\"string\"),\n                  \"p_so\": datasets.Value(\"string\"),\n                  \"p_save\": datasets.Value(\"string\"),\n                  \"p_hr\": datasets.Value(\"string\"),\n                  \"po\": datasets.Value(\"string\"),\n                  \"p_ip1\": datasets.Value(\"string\"),\n                  \"p_ip2\": datasets.Value(\"string\"),\n                  \"bb\": datasets.Value(\"string\"),\n                  \"ops\": datasets.Value(\"string\"),\n                  \"p_hld\": datasets.Value(\"string\"),\n                  \"bo\": datasets.Value(\"string\"),\n                  \"p_loss\": datasets.Value(\"string\"),\n                  \"e\": datasets.Value(\"string\"),\n                  \"p_game_score\": datasets.Value(\"string\"),\n                  \"p_win\": datasets.Value(\"string\"),\n                  \"a\": datasets.Value(\"string\"),\n                  \"p_era\": datasets.Value(\"string\"),\n                  \"d\": datasets.Value(\"string\"),\n                  \"p_out\": datasets.Value(\"string\"),\n                  \"h\": datasets.Value(\"string\"),\n                  \"p_er\": datasets.Value(\"string\"),\n                  \"p_np\": datasets.Value(\"string\"),\n                  \"hr\": datasets.Value(\"string\"),\n                  \"r\": datasets.Value(\"string\"),\n                  \"so\": datasets.Value(\"string\"),\n                  \"t\": datasets.Value(\"string\"),\n                  \"rbi\": datasets.Value(\"string\"),\n                  \"team\": datasets.Value(\"string\"),\n                  \"sb\": datasets.Value(\"string\"),\n                  \"slg\": datasets.Value(\"string\"),\n                  \"sf\": datasets.Value(\"string\"),\n                  \"obp\": datasets.Value(\"string\"),\n              }\n          ],\n          \"home_city\": datasets.Value(\"string\"),\n          \"vis_name\": datasets.Value(\"string\"),\n          \"play_by_play\": [{\n              \"top\": [{\n                  \"runs\": datasets.Value(\"string\"),\n                  \"scorers\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"pitcher\": datasets.Value(\"string\"),\n                  \"o\": datasets.Value(\"string\"),\n                  \"b\": datasets.Value(\"string\"),\n                  \"s\": datasets.Value(\"string\"),\n                  \"batter\": datasets.Value(\"string\"),\n                  \"b1\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"b2\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"b3\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"event\": datasets.Value(\"string\"),\n                  \"event2\": datasets.Value(\"string\"),\n                  \"home_team_runs\": datasets.Value(\"string\"),\n                  \"away_team_runs\": datasets.Value(\"string\"),\n                  \"rbi\": datasets.Value(\"string\"),\n                  \"error_runs\": datasets.Value(\"string\"),\n                  \"fielder_error\": datasets.Value(\"string\")\n              }\n              ],\n              \"bottom\": [{\n                  \"runs\": datasets.Value(\"string\"),\n                  \"scorers\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"pitcher\": datasets.Value(\"string\"),\n                  \"o\": datasets.Value(\"string\"),\n                  \"b\": datasets.Value(\"string\"),\n                  \"s\": datasets.Value(\"string\"),\n                  \"batter\": datasets.Value(\"string\"),\n                  \"b1\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"b2\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"b3\": [\n                      datasets.Value(\"string\")\n                  ],\n                  \"event\": datasets.Value(\"string\"),\n                  \"event2\": datasets.Value(\"string\"),\n                  \"home_team_runs\": datasets.Value(\"string\"),\n                  \"away_team_runs\": datasets.Value(\"string\"),\n                  \"rbi\": datasets.Value(\"string\"),\n                  \"error_runs\": datasets.Value(\"string\"),\n                  \"fielder_error\": datasets.Value(\"string\")\n              }\n              ],\n              \"inning\": datasets.Value(\"string\")\n          }\n          ],\n          \"vis_line\": {\n              \"innings\": [{\n               \"inn\": datasets.Value(\"string\"),\n               \"runs\": datasets.Value(\"string\")\n              }\n              ],\n              \"result\": datasets.Value(\"string\"),\n              \"team_runs\": datasets.Value(\"string\"),\n              \"team_hits\": datasets.Value(\"string\"),\n              \"team_errors\": datasets.Value(\"string\"),\n              \"team_name\": datasets.Value(\"string\"),\n              \"team_city\": datasets.Value(\"string\")\n          },\n          \"home_line\": {\n              \"innings\": [{\n                  \"inn\": datasets.Value(\"string\"),\n                  \"runs\": datasets.Value(\"string\")\n              }\n              ],\n              \"result\": datasets.Value(\"string\"),\n              \"team_runs\": datasets.Value(\"string\"),\n              \"team_hits\": datasets.Value(\"string\"),\n              \"team_errors\": datasets.Value(\"string\"),\n              \"team_name\": datasets.Value(\"string\"),\n              \"team_city\": datasets.Value(\"string\")\n          },\n          \"vis_city\": datasets.Value(\"string\"),\n          \"day\": datasets.Value(\"string\"),\n          \"summary\": [\n              datasets.Value(\"string\"),\n          ],\n          \"gem_id\": datasets.Value(\"string\")\n      }\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe high level structure contains the following attributes: home_name, vis_name, home_city, vis_city,\n              summary, summary_eval, day, gem_id, box_score, play_by_play, home_line, vis_line.\n              The attributes home_name, vis_name, home_city, vis_city and day are string values.\n              The attribute \"summary\" contains the summary in the form of a list of tokens.\n              The attribute \"summary_eval\" contains the summary in the form of a string of tokens. The difference from\n              \"summary\" field is that \"summary_eval\" doesn't contain \"\u003cem\u003eNEWPARAGRAPH\u003c/em\u003e\" delimiters to separate the\n              paragraphs. \"summary_eval\" field should be used to evaluate model outputs. \"summary\" field may be used\n              during the training process.\n              box_score contains the box score statistics of the players in the game. It is in the form of a list (of\n              average size 90), with each element describing the statistics of a player. The box score statistics\n              contain 53 attributes.\n              The description of the attributes is given below. The descriptions of most of the attributes is obtained\n              from \u003ca href=\"https://www.mlb.com/glossary/standard-stats\"\u003emlb.com\u003c/a\u003e.\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003er : Runs scored by a player in the game.\u003c/li\u003e\n              \u003cli\u003erbi Runs Batted In (RBI): action of a batter results in a run scored by other players in the team.\n              \u003c/li\u003e\n              \u003cli\u003epos Position of the player.\u003c/li\u003e\n              \u003cli\u003eavg Batting Average. It is an indicator of the hits in the players' career.\u003c/li\u003e\n              \u003cli\u003ebb A walk occurs when a pitcher throws four pitches out of the strike zone, none of which are swung at\n                by the hitter.\u003c/li\u003e\n              \u003cli\u003ehr Batter hits the ball in the air over the outfield fence.\u003c/li\u003e\n              \u003cli\u003ep_r Runs given by a pitcher in the game.\u003c/li\u003e\n              \u003cli\u003ep_bb Walks allowed by pitcher in a game.\u003c/li\u003e\n              \u003cli\u003ep_h Hits allowed by pitcher in a game.\u003c/li\u003e\n              \u003cli\u003ep_hr Home runs allowed by pitcher in a game.\u003c/li\u003e\n              \u003cli\u003ep_er Earned Run (ER): An earned run is any run that scores against a pitcher.\u003c/li\u003e\n              \u003cli\u003ep_era Earned Run Average (ERA): Earned run average represents the number of earned runs a pitcher\n                allows per nine innings.\u003c/li\u003e\n              \u003cli\u003ep_np Number of Pitches: A pitcher's total number of pitches is determined by all the pitches he throws\n                in game.\u003c/li\u003e\n              \u003cli\u003ep_ip1 Innings Pitched (IP1): Innings pitched measures the number of innings a pitcher remains in a\n                game. Because there are three outs in an inning, each out recorded represents one-third of an inning\n                pitched.\u003c/li\u003e\n              \u003cli\u003ep_ip2 Innings Pitched (IP2): Innings pitched measures the number of innings a pitcher remains in a\n                game. Because there are three outs in an inning, each out recorded represents one-third of an inning\n                pitched.\u003c/li\u003e\n              \u003cli\u003ep_w A pitcher receives a win when he is the pitcher of record when his team takes the lead for good.\n              \u003c/li\u003e\n              \u003cli\u003ep_l A pitcher receives a loss when a run that is charged to him proves to be the go-ahead run in the\n                game, giving the opposing team a lead it never gives up.\u003c/li\u003e\n              \u003cli\u003ep_so A strikeout occurs when a pitcher throws any combination of three swinging or looking strikes to\n                a hitter.\u003c/li\u003e\n              \u003cli\u003ep_save Save: A save is awarded to the relief pitcher who finishes a game for the winning team. A\n                pitcher cannot receive a save and a win in the same game.\u003c/li\u003e\n              \u003cli\u003ep_sv Saves: The count of saves recorded by a pitcher in his career.\u003c/li\u003e\n              \u003cli\u003esac A sacrifice fly occurs when a batter hits a fly-ball out to the outfield or foul territory that\n                allows a runner to score.\u003c/li\u003e\n              \u003cli\u003ep_bf Batters faced is simply a count of the number of total plate appearances against a certain\n                pitcher or team. In a perfect game -- with 27 outs -- a pitcher will record 27 batters faced.\u003c/li\u003e\n              \u003cli\u003ecs A caught stealing occurs when a runner attempts to steal but is tagged out before reaching second\n                base, third base or home plate.\u003c/li\u003e\n              \u003cli\u003ehbp A hit-by-pitch occurs when a batter is struck by a pitched ball without swinging at it. He is\n                awarded first base as a result.\u003c/li\u003e\n              \u003cli\u003eab An official at-bat comes when a batter reaches base via a fielder's choice, hit or an error (not\n                including catcher's interference) or when a batter is put out on a non-sacrifice.\u003c/li\u003e\n              \u003cli\u003ep_bs A blown save occurs when a relief pitcher enters a game in a save situation, but allows the tying\n                run to score.\u003c/li\u003e\n              \u003cli\u003ep_s The count of strikes thrown by a pitcher\u003c/li\u003e\n              \u003cli\u003elob Left on base can be viewed as both an individual statistic or as a team statistic. In an\n                individual batter's case, it refers to how many men remain on base after that batter makes an out at the\n                plate, as the batter has failed to do his job to score those runners -- or at least put himself in a\n                position to score. In a team's case or in an individual pitcher's case, it refers to the number of men\n                who remain on base at the end of an inning.\u003c/li\u003e\n              \u003cli\u003epo A fielder is credited with a putout when he is the fielder who physically records the act of\n                completing an out -- whether it be by stepping on the base for a forceout, tagging a runner, catching a\n                batted ball, or catching a third strike\u003c/li\u003e\n              \u003cli\u003eops OPS adds on-base percentage and slugging percentage to get one number that unites the two. It's\n                meant to combine how well a hitter can reach base, with how well he can hit for average and for power.\n              \u003c/li\u003e\n              \u003cli\u003ep_hld A hold occurs when a relief pitcher enters the game in a save situation and maintains his team's\n                lead for the next relief pitcher, while recording at least one out.\u003c/li\u003e\n              \u003cli\u003ep_loss True/False- Indicates losing pitcher\u003c/li\u003e\n              \u003cli\u003ee A fielder is given an error if, in the judgment of the official scorer, he fails to convert an out\n                on a play that an average fielder should have made.\u003c/li\u003e\n              \u003cli\u003ep_win True/False- Indicates winning pitcher\u003c/li\u003e\n              \u003cli\u003ea An assist is awarded to a fielder who touches the ball before a putout is recorded by another\n                fielder.\u003c/li\u003e\n              \u003cli\u003eh A hit occurs when a batter strikes the baseball into fair territory and reaches base without doing\n                so via an error or a fielder's choice.\u003c/li\u003e\n              \u003cli\u003eso A strikeout of a batter\u003c/li\u003e\n              \u003cli\u003eteam Team of the player\u003c/li\u003e\n              \u003cli\u003esb A stolen base occurs when a baserunner advances by taking a base to which he isn't entitled.\u003c/li\u003e\n              \u003cli\u003eslg Slugging percentage represents the total number of bases a player records per at-bat. Unlike\n                on-base percentage, slugging percentage deals only with hits and does not include walks and\n                hit-by-pitches in its equation.\u003c/li\u003e\n              \u003cli\u003esf A sacrifice fly occurs when a batter hits a fly-ball out to the outfield or foul territory that\n                allows a runner to score.\u003c/li\u003e\n              \u003cli\u003eobp OBP refers to how frequently a batter reaches base per plate appearance. Times on base include\n                hits, walks and hit-by-pitches, but do not include errors, times reached on a fielder's choice or a\n                dropped third strike.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThe description of attributes in play-by-play is below:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003ebatter Batter in the play.\u003c/li\u003e\n              \u003cli\u003epitcher Pitcher in play.\u003c/li\u003e\n              \u003cli\u003eb1 Player/s at first base position.\u003c/li\u003e\n              \u003cli\u003eb2 Player/s at second base position.\u003c/li\u003e\n              \u003cli\u003eb3 Player/s at third base position.\u003c/li\u003e\n              \u003cli\u003escorers Player/s scored in the play.\u003c/li\u003e\n              \u003cli\u003efielder_error Player committed field error.\u003c/li\u003e\n              \u003cli\u003eevent Event of the play such as single, double, home run etc.\u003c/li\u003e\n              \u003cli\u003eevent2 Second event of the play such as wild pitch, error etc.\u003c/li\u003e\n              \u003cli\u003einning Inning of the play.\u003c/li\u003e\n              \u003cli\u003etop/ bottom If home team is batting it is bottom and if away team is batting it is top.\u003c/li\u003e\n              \u003cli\u003eo Count of outs\u003c/li\u003e\n              \u003cli\u003eb Count of balls\u003c/li\u003e\n              \u003cli\u003es Count of strikes\u003c/li\u003e\n              \u003cli\u003er Count of runs\u003c/li\u003e\n              \u003cli\u003erbi Count of runs batted in (rbi)\u003c/li\u003e\n              \u003cli\u003eerror_runs Runs due to error\u003c/li\u003e\n              \u003cli\u003ehome_team_runs Score of home team\u003c/li\u003e\n              \u003cli\u003evis_team_runs Score of visiting team\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003e\u003ccode\u003ehome_line\u003c/code\u003e and \u003ccode\u003evis_line\u003c/code\u003e contain string value pairs for \u003ccode\u003eteam_name\u003c/code\u003e,\n              \u003ccode\u003eteam_city\u003c/code\u003e, \u003ccode\u003eteam_runs\u003c/code\u003e, \u003ccode\u003eteam_hits\u003c/code\u003e, \u003ccode\u003eteam_error\u003c/code\u003e,\n              \u003ccode\u003eresult\u003c/code\u003e, and a list of runs scored in each inning.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are three splits in the dataset: train, validation and test\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits are random.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset can verify if models are capable of long document generation. The challenges in long\n              document generation conditioned on input tables include ensuring coherent output, staying faithful to the\n              input, ensuring fluent output and avoiding repetition of text. Such aspects can be verified on models\n              trained on this dataset\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCompared to the existing RotoWire (Wiseman et al. 2017) dataset, MLB summaries are longer (approximately\n              by 50%) and the input records are richer and more structured (with the addition of play-by-play).\n              Moreover, the MLB dataset is five times larger in terms of data size (i.e., pairs of tables and game\n              summaries).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLong document generation, coherent ordering of information, faithfulness to the input statistics, fluency\n              in generation and avoiding repetition of text.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points removed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSome examples have been removed from training dataset which satisfied the below criteria:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eThe examples in training dataset which overlapped with validation/test.\u003c/li\u003e\n              \u003cli\u003eSome examples which described washed out games.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe \u003ca href=\"https://aclanthology.org/P19-1195\"\u003eresearch paper\u003c/a\u003e is a good resource\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatic evaluation measure can evaluate the factuality, content selection, content ordering and the\n              fluency of the model output. The factuality, content selection and content ordering is measured using an\n              Information Extraction based evaluation approach introduced by Wiseman et al (2017). The fluency is\n              measured using BLEU.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWiseman et al. (2017) define three metrics induced from the outputs of an Information Extraction model\n              which is run on the model/human-written game summaries . Let ŷ be the gold summary and y the model output.\n              • Relation Generation (RG) measures the precision and count of relations extracted from y that also appear\n              in records r.\n              • Content Selection (CS) measures the precision and recall of relations extracted from y that are also\n              extracted from ŷ.\n              • Content Ordering (CO) measures the complement of the normalized Damerau-Levenshtein distance (Brill and\n              Moore, 2000) between the sequences of relations extracted from y and ŷ\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe have reused the automatic metrics based on Information Extraction evaluation introduced by Wiseman et\n              al (2017). For human evaluation, we conducted studies to evaluate the factuality, coherence,\n              grammaticality and conciseness.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe most relevant previous results for dataset are in the TACL 2021 paper on \u003ca\n                href=\"https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00381/101876/Data-to-text-Generation-with-Macro-Planning\"\u003eData-to-text\n                Generation with Macro Planning\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset was curated to complement an existing data-to-text generation dataset (RotoWire by Wiseman\n              et al. 2017) which focuses on long document generation. Compared to RotoWire , MLB summaries are longer\n              (approximately by 50%) and the input records are richer and more structured (with the addition of\n              play-by-play). Moreover, the MLB dataset is five times larger in terms of data size (i.e., pairs of tables\n              and game summaries)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal is to study automatic generation of long documents in a data-to-text setting. The generated\n              summaries should exhibit coherent ordering of content, be faithful to the input statistics, be fluent and\n              avoid repetition of text.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe game summaries are produced by professional writers.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language focuses on the sports domain.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGame summaries were tokenized using NLTK (Bird et al., 2009) and hyphenated words were separated.\n              Sentences containing quotes were removed as they included opinions and non-factual statements unrelated to\n              the input tables. Sometimes MLB summaries contain a \"Game notes\" section with incidental information which\n              was also removed.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe copyright remains with the original data creators and the usage permission is restricted to\n              non-commercial uses.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003esensitive information\u003c/code\u003e, \u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"mlb_data_to_text","type":"Data-to-Text","languages":"English","summary":"The MLB dataset is an English sport-related data-to-text dataset in the baseball domain. The input is a large table with results of a game and the output is a description of the game."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"mlb_data_to_text"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/mlsum.html b/data_cards/mlsum.html
index a4ffd9fe..0bbef370 100644
--- a/data_cards/mlsum.html
+++ b/data_cards/mlsum.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->mlsum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">mlsum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->mlsum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">mlsum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1771,4 +1771,4 @@ <h5>Any Documented Social Biases?
     </div>
   </section>
 
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"mlsum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003emlsum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eMLSum is a multilingual summarization dataset crawled from different news websites. The GEM version supports\n          the German and Spanish subset alongside specifically collected challenge sets for COVID-related articles to\n          test out-of-domain generalization.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/mlsum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/mlsum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eN/A\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.647/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThomas Scialom, Paul-Alexis Dray, Sylvain Lamprier, Benjamin Piwowarski, Jacopo Staiano\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThomas Scialom\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe speaker is required to produce a high quality summary of news articles in the same language as the input\n          article.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://gitlab.lip6.fr/scialom/mlsum_data/-/tree/master/MLSUM\"\u003eGitlab\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.647/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{scialom-etal-2020-mlsum,\ntitle = \"{MLSUM}: The Multilingual Summarization Corpus\",\nauthor = \"Scialom, Thomas  and\nDray, Paul-Alexis  and\nLamprier, Sylvain  and\nPiwowarski, Benjamin  and\nStaiano, Jacopo\",\nbooktitle = \"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)\",\nmonth = nov,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2020.emnlp-main.647\",\ndoi = \"10.18653/v1/2020.emnlp-main.647\",\npages = \"8051--8067\",\nabstract = \"We present MLSUM, the first large-scale MultiLingual SUMmarization dataset. Obtained from online newspapers, it contains 1.5M+ article/summary pairs in five different languages {--} namely, French, German, Spanish, Russian, Turkish. Together with English news articles from the popular CNN/Daily mail dataset, the collected data form a large scale multilingual dataset which can enable new research directions for the text summarization community. We report cross-lingual comparative analyses based on state-of-the-art systems. These highlight existing biases which motivate the use of a multi-lingual dataset.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThomas Scialom\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{thomas,paul-alexis,jacopo}@recital.ai, {sylvain.lamprier,benjamin.piwowarski}@lip6.fr\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere is only one dialect per language, Hochdeutsch for German and Castilian Spanish for Spanish.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe German articles are crawled from Süddeutsche Zeitung and the Spanish ones from El Pais.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe intended use of this dataset is to augment existing datasets for English news summarization with\n              additional languages.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAdd. License Info\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the 'other' license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRestricted to non-commercial research purposes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a high quality summary of news articles in the same language as the\n              input article.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCNRS, Sorbonne Université, reciTAL\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThomas Scialom, Paul-Alexis Dray, Sylvain Lamprier, Benjamin Piwowarski, Jacopo Staiano\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFunding information is not specified.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original data card was written by Pedro Henrique Martins (Instituto de Telecomunicações) and\n              Sebastian Gehrmann (Google Research) extended and updated it to the v2 format. The COVID challenge set was\n              created by Laura Perez-Beltrachini (University of Edinburgh). Data cleaning was done by Juan Diego\n              Rodriguez (UT Austin).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data fields are:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003etext\u003c/code\u003e: the source article (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary\u003c/code\u003e: the output summary (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etopic\u003c/code\u003e: the topic of the article (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eurl\u003c/code\u003e: the article's url (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etitle\u003c/code\u003e: the article's title (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edate\u003c/code\u003e: the article's date (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure follows previously released datasets. The \u003ccode\u003etopic\u003c/code\u003e and \u003ccode\u003etitle\u003c/code\u003e fields\n              were added to enable additional tasks like title generation and topic detection.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThey are human written highlights or summaries scraped from the same website.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'date': '00/01/2010',\n'gem_id': 'mlsum_de-train-2',\n'gem_parent_id': 'mlsum_de-train-2',\n'references': [],\n'target': 'Oskar Lafontaine gibt den Parteivorsitz der Linken ab - und seine Kollegen streiten, wer ihn beerben soll. sueddeutsche.de stellt die derzeit aussichtsreichsten Anwärter für Führungsaufgaben vor. Mit Vote.',\n'text': 'Wenn an diesem Montag die Landesvorsitzenden der Linken über die Nachfolger der derzeitigen Chefs Lothar Bisky und Oskar Lafontaine sowie des Bundesgeschäftsführers Dietmar Bartsch beraten, geht es nicht nur darum, wer die Partei führen soll. Es geht auch um die künftige Ausrichtung und Stärke einer Partei, die vor allem von Lafontaine zusammengehalten worden war. Ihm war es schließlich vor fünf Jahren gelungen, aus der ostdeutschen PDS und der westedeutschen WASG eine Partei zu formen. Eine Partei allerdings, die zerrissen ist in Ost und West, in Regierungswillige und ewige Oppositionelle, in Realos und Ideologen, in gemäßigte und radikale Linke. Wir stellen mögliche Kandidaten vor. Stimmen Sie ab: Wen halten Sie für geeignet und wen für unfähig? Kampf um Lafontaines Erbe: Gregor Gysi Sollte überhaupt jemand die Partei alleine führen, wie es sich viele Ostdeutsche wünschen, käme dafür wohl nur der 62-jährige Gregor Gysi in Betracht. Er ist nach Lafontaine einer der bekanntesten Politiker der Linken und derzeit Fraktionsvorsitzender der Partei im Bundestag. Allerdings ist der ehemalige PDS-Vorsitzende und Rechtsanwalt nach drei Herzinfarkten gesundheitlich angeschlagen. Wahrscheinlich wäre deshalb, dass er die zerstrittene Partei nur übergangsweise führt. Doch noch ist nicht klar, ob eine Person allein die Partei führen soll oder eine Doppelspitze. Viele Linke wünschen sich ein Duo aus einem westdeutschen und einem ostdeutschen Politiker, Mann und Frau. Foto: Getty Images',\n'title': 'Personaldebatte bei der Linken - Wer kommt nach Lafontaine?',\n'topic': 'politik',\n'url': 'https://www.sueddeutsche.de/politik/personaldebatte-bei-der-linken-wer-kommt-nach-lafontaine-1.70041'\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe statistics of the original dataset are:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth align=\"left\"\u003e\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eDataset\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eTrain\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eValidation\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eTest\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eMean article length\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eMean summary length\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd align=\"left\"\u003eGerman\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e242,982\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e220,887\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e11,394\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e10,701\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e570.6 (words)\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e30.36 (words)\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd align=\"left\"\u003eSpanish\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e290,645\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e266,367\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e10,358\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e13,920\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e800.5 (words)\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e20.71 (words)\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe statistics of the cleaned version of the dataset are:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth align=\"left\"\u003e\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eDataset\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eTrain\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eValidation\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd align=\"left\"\u003eGerman\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e242,835\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e220,887\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e11,392\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e10,695\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd align=\"left\"\u003eSpanish\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e283,228\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e259,886\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e9,977\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e13,365\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe COVID challenge sets have 5058 (de) and 1938 (es) examples.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe training set contains data from 2010 to 2018. Data from 2019 (~10% of the dataset) is used for\n              validation (up to May) and testing(May-December 2019).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSome topics are less represented within the dataset (e.g., Financial news in German and Television in\n              Spanish).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAs the first large-scale multilingual summarization dataset, it enables evaluation of summarization\n              models beyond English.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn our configuration, the dataset is fully non-English.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eContent Selection, Content Planning, Realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points removed\u003c/code\u003e, \u003ccode\u003edata points added\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe modifications done to the original dataset are the following:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eSelection of 2 languages (Spanish and German) out of the dataset 5 languages due to copyright\n                restrictions.\u003c/li\u003e\n              \u003cli\u003eRemoval of duplicate articles.\u003c/li\u003e\n              \u003cli\u003eManually removal of article-summary pairs for which the summary is not related to the article.\u003c/li\u003e\n              \u003cli\u003eRemoval of article-summary pairs written in a different language (detected using the \u003ca\n                  href=\"https://pypi.org/project/langdetect/\"\u003elangdetect\u003c/a\u003e library).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor both selected languages (German and Spanish), we compiled time-shifted test data in the form of new\n              articles for the second semester of 2020 with Covid19-related keywords. We collected articles from the\n              same German and Spanish outlets as the original MLSUM datasets (El Pais and Süddeutsche Zeitung). We used\n              the scripts provided for the re-creation of the \u003ca href=\"https://github.com/recitalAI/MLSUM\"\u003eMLSUM\n                datasets\u003c/a\u003e. The new challenge test set for German contains 5058 instances and the Spanish one contains\n              1938.\u003c/p\u003e\n            \u003cp\u003eWe additionally sample 500 training and validation points as additional challenge sets to measure\n              overfitting.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization to unseen topics.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eContent Selection, Content Planning, Realization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNovelty: Number of generated n-grams not included in the source articles.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE and METEOR both measure n-gram overlap with a focus on recall and are standard summarization\n              metrics. Novelty is often reported alongside them to characterize how much a model diverges from its\n              inputs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe GEM benchmark results (\u003ca\n                href=\"https://gem-benchmark.com/results\"\u003ehttps://gem-benchmark.com/results\u003c/a\u003e) report a wide range of\n              metrics include lexical overlap metrics but also semantic ones like BLEURT and BERT-Score.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe rationale was to create a multilingual news summarization dataset that mirrors the format of popular\n              English datasets like XSum or CNN/DM.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a high quality summary of news articles in the same language as the\n              input article.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://www.lemonde.fr\"\u003ewww.lemonde.fr\u003c/a\u003e\n              \u003ca href=\"http://www.sueddeutsche.de\"\u003ewww.sueddeutsche.de\u003c/a\u003e\n              \u003ca href=\"http://www.elpais.com\"\u003ewww.elpais.com\u003c/a\u003e\n              \u003ca href=\"http://www.mk.ru\"\u003ewww.mk.ru\u003c/a\u003e\n              \u003ca href=\"http://www.internethaber.com\"\u003ewww.internethaber.com\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language producers are professional journalists.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4/5 of the original languages report their topics (except Turkish) and the distributions differ between\n              sources. The dominant topics in German are Politik, Sport, Wirtschaft (economy). The dominant topics in\n              Spanish are actualidad (current news) and opinion. French and Russian are different as well but we omit\n              these languages in the GEM version.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn the original dataset, only one filter was applied: all the articles shorter than 50 words or summaries\n              shorter than 10 words are discarded.\u003c/p\u003e\n            \u003cp\u003eThe GEM version additionally applies langID filter to ensure that articles are in the correct language.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe copyright remains with the original data creators and the usage permission is restricted to\n              non-commercial uses.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003esensitive information\u003c/code\u003e, \u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n\u003c/div\u003e","title":"mlsum","type":"Summarization","languages":"German, Spanish, Castilian","summary":"MLSum is a multilingual summarization dataset crawled from different news websites. The GEM version supports the German and Spanish subset alongside specifically collected challenge sets for COVID-related articles to test out-of-domain generalization."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"mlsum"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"mlsum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003emlsum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eMLSum is a multilingual summarization dataset crawled from different news websites. The GEM version supports\n          the German and Spanish subset alongside specifically collected challenge sets for COVID-related articles to\n          test out-of-domain generalization.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/mlsum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/mlsum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eN/A\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.647/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThomas Scialom, Paul-Alexis Dray, Sylvain Lamprier, Benjamin Piwowarski, Jacopo Staiano\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThomas Scialom\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe speaker is required to produce a high quality summary of news articles in the same language as the input\n          article.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://gitlab.lip6.fr/scialom/mlsum_data/-/tree/master/MLSUM\"\u003eGitlab\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.647/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{scialom-etal-2020-mlsum,\ntitle = \"{MLSUM}: The Multilingual Summarization Corpus\",\nauthor = \"Scialom, Thomas  and\nDray, Paul-Alexis  and\nLamprier, Sylvain  and\nPiwowarski, Benjamin  and\nStaiano, Jacopo\",\nbooktitle = \"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)\",\nmonth = nov,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2020.emnlp-main.647\",\ndoi = \"10.18653/v1/2020.emnlp-main.647\",\npages = \"8051--8067\",\nabstract = \"We present MLSUM, the first large-scale MultiLingual SUMmarization dataset. Obtained from online newspapers, it contains 1.5M+ article/summary pairs in five different languages {--} namely, French, German, Spanish, Russian, Turkish. Together with English news articles from the popular CNN/Daily mail dataset, the collected data form a large scale multilingual dataset which can enable new research directions for the text summarization community. We report cross-lingual comparative analyses based on state-of-the-art systems. These highlight existing biases which motivate the use of a multi-lingual dataset.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThomas Scialom\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{thomas,paul-alexis,jacopo}@recital.ai, {sylvain.lamprier,benjamin.piwowarski}@lip6.fr\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere is only one dialect per language, Hochdeutsch for German and Castilian Spanish for Spanish.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe German articles are crawled from Süddeutsche Zeitung and the Spanish ones from El Pais.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe intended use of this dataset is to augment existing datasets for English news summarization with\n              additional languages.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAdd. License Info\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the 'other' license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRestricted to non-commercial research purposes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a high quality summary of news articles in the same language as the\n              input article.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCNRS, Sorbonne Université, reciTAL\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThomas Scialom, Paul-Alexis Dray, Sylvain Lamprier, Benjamin Piwowarski, Jacopo Staiano\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFunding information is not specified.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original data card was written by Pedro Henrique Martins (Instituto de Telecomunicações) and\n              Sebastian Gehrmann (Google Research) extended and updated it to the v2 format. The COVID challenge set was\n              created by Laura Perez-Beltrachini (University of Edinburgh). Data cleaning was done by Juan Diego\n              Rodriguez (UT Austin).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data fields are:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003etext\u003c/code\u003e: the source article (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary\u003c/code\u003e: the output summary (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etopic\u003c/code\u003e: the topic of the article (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eurl\u003c/code\u003e: the article's url (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etitle\u003c/code\u003e: the article's title (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edate\u003c/code\u003e: the article's date (\u003ccode\u003estring\u003c/code\u003e).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure follows previously released datasets. The \u003ccode\u003etopic\u003c/code\u003e and \u003ccode\u003etitle\u003c/code\u003e fields\n              were added to enable additional tasks like title generation and topic detection.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThey are human written highlights or summaries scraped from the same website.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'date': '00/01/2010',\n'gem_id': 'mlsum_de-train-2',\n'gem_parent_id': 'mlsum_de-train-2',\n'references': [],\n'target': 'Oskar Lafontaine gibt den Parteivorsitz der Linken ab - und seine Kollegen streiten, wer ihn beerben soll. sueddeutsche.de stellt die derzeit aussichtsreichsten Anwärter für Führungsaufgaben vor. Mit Vote.',\n'text': 'Wenn an diesem Montag die Landesvorsitzenden der Linken über die Nachfolger der derzeitigen Chefs Lothar Bisky und Oskar Lafontaine sowie des Bundesgeschäftsführers Dietmar Bartsch beraten, geht es nicht nur darum, wer die Partei führen soll. Es geht auch um die künftige Ausrichtung und Stärke einer Partei, die vor allem von Lafontaine zusammengehalten worden war. Ihm war es schließlich vor fünf Jahren gelungen, aus der ostdeutschen PDS und der westedeutschen WASG eine Partei zu formen. Eine Partei allerdings, die zerrissen ist in Ost und West, in Regierungswillige und ewige Oppositionelle, in Realos und Ideologen, in gemäßigte und radikale Linke. Wir stellen mögliche Kandidaten vor. Stimmen Sie ab: Wen halten Sie für geeignet und wen für unfähig? Kampf um Lafontaines Erbe: Gregor Gysi Sollte überhaupt jemand die Partei alleine führen, wie es sich viele Ostdeutsche wünschen, käme dafür wohl nur der 62-jährige Gregor Gysi in Betracht. Er ist nach Lafontaine einer der bekanntesten Politiker der Linken und derzeit Fraktionsvorsitzender der Partei im Bundestag. Allerdings ist der ehemalige PDS-Vorsitzende und Rechtsanwalt nach drei Herzinfarkten gesundheitlich angeschlagen. Wahrscheinlich wäre deshalb, dass er die zerstrittene Partei nur übergangsweise führt. Doch noch ist nicht klar, ob eine Person allein die Partei führen soll oder eine Doppelspitze. Viele Linke wünschen sich ein Duo aus einem westdeutschen und einem ostdeutschen Politiker, Mann und Frau. Foto: Getty Images',\n'title': 'Personaldebatte bei der Linken - Wer kommt nach Lafontaine?',\n'topic': 'politik',\n'url': 'https://www.sueddeutsche.de/politik/personaldebatte-bei-der-linken-wer-kommt-nach-lafontaine-1.70041'\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe statistics of the original dataset are:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth align=\"left\"\u003e\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eDataset\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eTrain\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eValidation\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eTest\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eMean article length\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eMean summary length\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd align=\"left\"\u003eGerman\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e242,982\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e220,887\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e11,394\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e10,701\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e570.6 (words)\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e30.36 (words)\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd align=\"left\"\u003eSpanish\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e290,645\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e266,367\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e10,358\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e13,920\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e800.5 (words)\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e20.71 (words)\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe statistics of the cleaned version of the dataset are:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth align=\"left\"\u003e\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eDataset\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eTrain\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eValidation\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd align=\"left\"\u003eGerman\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e242,835\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e220,887\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e11,392\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e10,695\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd align=\"left\"\u003eSpanish\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e283,228\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e259,886\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e9,977\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e13,365\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe COVID challenge sets have 5058 (de) and 1938 (es) examples.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe training set contains data from 2010 to 2018. Data from 2019 (~10% of the dataset) is used for\n              validation (up to May) and testing(May-December 2019).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSome topics are less represented within the dataset (e.g., Financial news in German and Television in\n              Spanish).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAs the first large-scale multilingual summarization dataset, it enables evaluation of summarization\n              models beyond English.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn our configuration, the dataset is fully non-English.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eContent Selection, Content Planning, Realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points removed\u003c/code\u003e, \u003ccode\u003edata points added\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe modifications done to the original dataset are the following:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eSelection of 2 languages (Spanish and German) out of the dataset 5 languages due to copyright\n                restrictions.\u003c/li\u003e\n              \u003cli\u003eRemoval of duplicate articles.\u003c/li\u003e\n              \u003cli\u003eManually removal of article-summary pairs for which the summary is not related to the article.\u003c/li\u003e\n              \u003cli\u003eRemoval of article-summary pairs written in a different language (detected using the \u003ca\n                  href=\"https://pypi.org/project/langdetect/\"\u003elangdetect\u003c/a\u003e library).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor both selected languages (German and Spanish), we compiled time-shifted test data in the form of new\n              articles for the second semester of 2020 with Covid19-related keywords. We collected articles from the\n              same German and Spanish outlets as the original MLSUM datasets (El Pais and Süddeutsche Zeitung). We used\n              the scripts provided for the re-creation of the \u003ca href=\"https://github.com/recitalAI/MLSUM\"\u003eMLSUM\n                datasets\u003c/a\u003e. The new challenge test set for German contains 5058 instances and the Spanish one contains\n              1938.\u003c/p\u003e\n            \u003cp\u003eWe additionally sample 500 training and validation points as additional challenge sets to measure\n              overfitting.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization to unseen topics.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eContent Selection, Content Planning, Realization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNovelty: Number of generated n-grams not included in the source articles.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE and METEOR both measure n-gram overlap with a focus on recall and are standard summarization\n              metrics. Novelty is often reported alongside them to characterize how much a model diverges from its\n              inputs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe GEM benchmark results (\u003ca\n                href=\"https://gem-benchmark.com/results\"\u003ehttps://gem-benchmark.com/results\u003c/a\u003e) report a wide range of\n              metrics include lexical overlap metrics but also semantic ones like BLEURT and BERT-Score.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe rationale was to create a multilingual news summarization dataset that mirrors the format of popular\n              English datasets like XSum or CNN/DM.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a high quality summary of news articles in the same language as the\n              input article.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://www.lemonde.fr\"\u003ewww.lemonde.fr\u003c/a\u003e\n              \u003ca href=\"http://www.sueddeutsche.de\"\u003ewww.sueddeutsche.de\u003c/a\u003e\n              \u003ca href=\"http://www.elpais.com\"\u003ewww.elpais.com\u003c/a\u003e\n              \u003ca href=\"http://www.mk.ru\"\u003ewww.mk.ru\u003c/a\u003e\n              \u003ca href=\"http://www.internethaber.com\"\u003ewww.internethaber.com\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language producers are professional journalists.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4/5 of the original languages report their topics (except Turkish) and the distributions differ between\n              sources. The dominant topics in German are Politik, Sport, Wirtschaft (economy). The dominant topics in\n              Spanish are actualidad (current news) and opinion. French and Russian are different as well but we omit\n              these languages in the GEM version.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn the original dataset, only one filter was applied: all the articles shorter than 50 words or summaries\n              shorter than 10 words are discarded.\u003c/p\u003e\n            \u003cp\u003eThe GEM version additionally applies langID filter to ensure that articles are in the correct language.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe copyright remains with the original data creators and the usage permission is restricted to\n              non-commercial uses.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003esensitive information\u003c/code\u003e, \u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n\u003c/div\u003e","title":"mlsum","type":"Summarization","languages":"German, Spanish, Castilian","summary":"MLSum is a multilingual summarization dataset crawled from different news websites. The GEM version supports the German and Spanish subset alongside specifically collected challenge sets for COVID-related articles to test out-of-domain generalization."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"mlsum"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/opusparcus.html b/data_cards/opusparcus.html
index 58ff7674..9ba8a6b0 100644
--- a/data_cards/opusparcus.html
+++ b/data_cards/opusparcus.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->opusparcus</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">opusparcus</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Paraphrasing</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->opusparcus</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">opusparcus</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Paraphrasing</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1202,4 +1202,4 @@ <h5>Data Splits
     </div>
   </section>
 
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"opusparcus","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eopusparcus\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eOpusparcus is a paraphrase corpus for six European languages: German, English, Finnish, French, Russian, and\n          Swedish. The paraphrases consist of subtitles from movies and TV shows.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/opusparcus')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/opusparcus\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://urn.fi/urn:nbn:fi:lb-2018021221\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://www.lrec-conf.org/proceedings/lrec2018/pdf/131.pdf\"\u003eLREC\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eMathias Creutz\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFinnish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e,\n          \u003ccode\u003eSwedish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eModels can be trained, e.g., for paraphrase detection and generation, that is, determining whether two given\n          sentences mean the same thing or generating new paraphrases for a given sentence.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://urn.fi/urn:nbn:fi:lb-2018021221\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://urn.fi/urn:nbn:fi:lb-2018021221\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://www.lrec-conf.org/proceedings/lrec2018/pdf/131.pdf\"\u003eLREC\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@InProceedings{creutz:lrec2018,\ntitle = {Open Subtitles Paraphrase Corpus for Six Languages},\nauthor={Mathias Creutz},\nbooktitle={Proceedings of the 11th edition of the Language Resources and Evaluation Conference (LREC 2018)},\nyear={2018},\nmonth = {May 7-12},\naddress = {Miyazaki, Japan},\neditor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},\npublisher = {European Language Resources Association (ELRA)},\nisbn = {979-10-95546-00-9},\nlanguage = {english},\nurl={http://www.lrec-conf.org/proceedings/lrec2018/pdf/131.pdf}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMathias Creutz\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003efirstname dot lastname at helsinki dot fi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFinnish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n              \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eSwedish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOpusparcus is a paraphrase corpus for six European languages: German, English, Finnish, French, Russian,\n              and Swedish. The paraphrases consist of subtitles from movies and TV shows.\u003c/p\u003e\n            \u003cp\u003eThe data in Opusparcus has been extracted from \u003ca\n                href=\"http://opus.nlpl.eu/OpenSubtitles2016.php\"\u003eOpenSubtitles2016\u003c/a\u003e, which is in turn based on data\n              from \u003ca href=\"http://www.opensubtitles.org/\"\u003eOpenSubtitles\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOpusparcus is a sentential paraphrase corpus for multiple languages containing colloquial language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eParaphrasing\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eModels can be trained, e.g., for paraphrase detection and generation, that is, determining whether two\n              given sentences mean the same thing or generating new paraphrases for a given sentence.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMathias Creutz (University of Helsinki)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003esent1\u003c/code\u003e: a tokenized sentence\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esent2\u003c/code\u003e: another tokenized sentence, which is potentially a paraphrase of\n                \u003ccode\u003esent1\u003c/code\u003e.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eannot_score\u003c/code\u003e: a value between 1.0 and 4.0 indicating how good an example of paraphrases\n                \u003ccode\u003esent1\u003c/code\u003e and \u003ccode\u003esent2\u003c/code\u003e are. (For the training sets, the value is 0.0, which indicates\n                that no manual annotation has taken place.)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003elang\u003c/code\u003e: language of this dataset\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: unique identifier of this entry\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eAll fields are strings except \u003ccode\u003eannot_score\u003c/code\u003e, which is a float.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor each target language, the Opusparcus data have been partitioned into three types of data sets:\n              training, validation and test sets. The training sets are large, consisting of millions of sentence pairs,\n              and have been compiled automatically, with the help of probabilistic ranking functions. The development\n              and test sets consist of sentence pairs that have been annotated manually; each set contains approximately\n              1000 sentence pairs that have been verified to be acceptable paraphrases by two independent annotators.\n            \u003c/p\u003e\n            \u003cp\u003eWhen you download Opusparcus, you must always indicate the language you want to retrieve, for instance:\n            \u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003edata = load_dataset(\"GEM/opusparcus\", lang=\"de\")\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe above command will download the validation and test sets for German. If additionally, you want to\n              retrieve training data, you need to specify the level of quality you desire, such as \"French, with 90%\n              quality of the training data\":\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003edata = load_dataset(\"GEM/opusparcus\", lang=\"fr\", quality=90)\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe entries in the training sets have been ranked automatically by how likely they are paraphrases, best\n              first, worst last. The quality parameter indicates the estimated proportion (in percent) of true\n              paraphrases in the training set. Allowed quality values range between 60 and 100, in increments of 5 (60,\n              65, 70, ..., 100). A value of 60 means that 60% of the sentence pairs in the training set are estimated to\n              be true paraphrases (and the remaining 40% are not). A higher value produces a smaller but cleaner set.\n              The smaller sets are subsets of the larger sets, such that the \u003ccode\u003equality=95\u003c/code\u003e set is a subset of\n              \u003ccode\u003equality=90\u003c/code\u003e, which is a subset of \u003ccode\u003equality=85\u003c/code\u003e, and so on.\u003c/p\u003e\n            \u003cp\u003eThe default \u003ccode\u003equality\u003c/code\u003e value, if omitted, is 100. This matches no training data at all, which\n              can be convenient, if you are only interested in the validation and test sets, which are considerably\n              smaller, but manually annotated.\u003c/p\u003e\n            \u003cp\u003eNote that an alternative to typing the parameter values explicitly, you can use configuration names\n              instead. The following commands are equivalent to the ones above:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003edata = load_dataset(\"GEM/opusparcus\", \"de.100\")\ndata = load_dataset(\"GEM/opusparcus\", \"fr.90\")\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators have used the following scores to label sentence pairs in the test and validation sets:\u003c/p\u003e\n            \u003cp\u003e4: Good example of paraphrases (Dark green button in the annotation tool): The two sentences can be used\n              in the same situation and essentially \"mean the same thing\".\u003c/p\u003e\n            \u003cp\u003e3: Mostly good example of paraphrases (Light green button in the annotation tool): It is acceptable to\n              think that the two sentences refer to the same thing, although one sentence might be more specific\n              than the other one, or there are differences in style, such as polite form versus familiar form.\u003c/p\u003e\n            \u003cp\u003e2: Mostly bad example of paraphrases (Yellow button in the annotation tool): There is some connection\n              between the sentences that explains why they occur together, but one would not really consider them to\n              mean the same thing.\u003c/p\u003e\n            \u003cp\u003e1: Bad example of paraphrases (Red button in the annotation tool): There is no obvious connection. The\n              sentences mean different things.\u003c/p\u003e\n            \u003cp\u003eIf the two annotators fully agreed on the category, the value in the \u003ccode\u003eannot_score\u003c/code\u003e field is\n              4.0, 3.0, 2.0 or 1.0. If the two annotators chose adjacent categories, the value in this field will be\n              3.5, 2.5 or\n              1.5. For instance, a value of 2.5 means that one annotator gave a score of 3 (\"mostly good\"), indicating a\n              possible paraphrase pair, whereas the other annotator scored this as a 2 (\"mostly bad\"), that is, unlikely\n              to be a paraphrase pair. If the annotators disagreed by more than one category, the sentence pair was\n              discarded and won't show up in the datasets.\u003c/p\u003e\n            \u003cp\u003eThe training sets were not annotated manually. This is indicated by\n              the value 0.0 in the \u003ccode\u003eannot_score\u003c/code\u003e field.\u003c/p\u003e\n            \u003cp\u003eFor an assessment of of inter-annotator agreement, see Aulamo et al. (2019). \u003ca\n                href=\"http://ceur-ws.org/Vol-2364/3_paper.pdf\"\u003eAnnotation of subtitle paraphrases using a new web\n                tool.\u003c/a\u003e In \u003cem\u003eProceedings of the\n                Digital Humanities in the Nordic Countries 4th Conference\u003c/em\u003e, Copenhagen, Denmark.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'annot_score': 4.0, 'gem_id': 'gem-opusparcus-test-1587', 'lang': 'en', 'sent1': \"I haven 't been contacted by anybody .\", 'sent2': \"Nobody 's contacted me .\"}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is split into training, validation and test sets. The validation and test sets come in two\n              versions, the regular validation and test sets and the full sets, called validation.full and test.full.\n              The full sets contain all sentence pairs successfully annotated by the annotators, including the sentence\n              pairs that were rejected as paraphrases. The annotation scores of the full sets thus range between 1.0 and\n              4.0. The regular validation and test sets only contain sentence pairs that qualify as paraphrases, scored\n              between 3.0 and 4.0 by the annotators.\u003c/p\u003e\n            \u003cp\u003eThe number of sentence pairs in the data splits are as follows for each of the languages. The range\n              between the smallest (\u003ccode\u003equality=95\u003c/code\u003e) and largest (\u003ccode\u003equality=60\u003c/code\u003e) train configuration\n              have been shown.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003etrain\u003c/th\u003e\n                    \u003cth\u003evalid\u003c/th\u003e\n                    \u003cth\u003etest\u003c/th\u003e\n                    \u003cth\u003evalid.full\u003c/th\u003e\n                    \u003cth\u003etest.full\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ede\u003c/td\u003e\n                    \u003ctd\u003e0.59M .. 13M\u003c/td\u003e\n                    \u003ctd\u003e1013\u003c/td\u003e\n                    \u003ctd\u003e1047\u003c/td\u003e\n                    \u003ctd\u003e1582\u003c/td\u003e\n                    \u003ctd\u003e1586\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003een\u003c/td\u003e\n                    \u003ctd\u003e1.0M .. 35M\u003c/td\u003e\n                    \u003ctd\u003e1015\u003c/td\u003e\n                    \u003ctd\u003e982\u003c/td\u003e\n                    \u003ctd\u003e1455\u003c/td\u003e\n                    \u003ctd\u003e1445\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003efi\u003c/td\u003e\n                    \u003ctd\u003e0.48M .. 8.9M\u003c/td\u003e\n                    \u003ctd\u003e963\u003c/td\u003e\n                    \u003ctd\u003e958\u003c/td\u003e\n                    \u003ctd\u003e1760\u003c/td\u003e\n                    \u003ctd\u003e1749\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003efr\u003c/td\u003e\n                    \u003ctd\u003e0.94M .. 22M\u003c/td\u003e\n                    \u003ctd\u003e997\u003c/td\u003e\n                    \u003ctd\u003e1007\u003c/td\u003e\n                    \u003ctd\u003e1630\u003c/td\u003e\n                    \u003ctd\u003e1674\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eru\u003c/td\u003e\n                    \u003ctd\u003e0.15M .. 15M\u003c/td\u003e\n                    \u003ctd\u003e1020\u003c/td\u003e\n                    \u003ctd\u003e1068\u003c/td\u003e\n                    \u003ctd\u003e1854\u003c/td\u003e\n                    \u003ctd\u003e1855\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esv\u003c/td\u003e\n                    \u003ctd\u003e0.24M .. 4.5M\u003c/td\u003e\n                    \u003ctd\u003e984\u003c/td\u003e\n                    \u003ctd\u003e947\u003c/td\u003e\n                    \u003ctd\u003e1887\u003c/td\u003e\n                    \u003ctd\u003e1901\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eAs a concrete example, loading the English data requesting 95% quality of the train split produces the\n              following:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e\u003e\u003e\u003e data = load_dataset(\"GEM/opusparcus\", lang=\"en\", quality=95)\n\n\u003e\u003e\u003e data\nDatasetDict({\ntest: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 982\n})\nvalidation: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 1015\n})\ntest.full: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 1445\n})\nvalidation.full: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 1455\n})\ntrain: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 1000000\n})\n})\n\n\u003e\u003e\u003e data[\"test\"][0]\n{'annot_score': 4.0, 'gem_id': 'gem-opusparcus-test-1587', 'lang': 'en', 'sent1': \"I haven 't been contacted by anybody .\", 'sent2': \"Nobody 's contacted me .\"}\n\n\u003e\u003e\u003e data[\"validation\"][2]\n{'annot_score': 3.0, 'gem_id': 'gem-opusparcus-validation-1586', 'lang': 'en', 'sent1': 'No promises , okay ?', 'sent2': \"I 'm not promising anything .\"}\n\n\u003e\u003e\u003e data[\"train\"][1000]\n{'annot_score': 0.0, 'gem_id': 'gem-opusparcus-train-12501001', 'lang': 'en', 'sent1': 'Am I beautiful ?', 'sent2': 'Am I pretty ?'}\n\n#### Splitting Criteria\n\n\u0026#x3C;!-- info: Describe any criteria for splitting the data, if used. If there are differences between the splits (e.g., if the training annotations are machine-generated and the dev and test ones are created by humans, or if different numbers of annotators contributed to each example), describe them here. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe validation and test sets have been annotated manually, but the training sets have been produced using automatic scoring and come in different size configurations depending on the desired quality level. (See above descriptions and examples for more details.)\n\nPlease note that previous work suggests that a larger and noisier training set is better than a\nsmaller and clean set. See Sjöblom et al. (2018). [Paraphrase Detection on Noisy Subtitles in Six\nLanguages](http://noisy-text.github.io/2018/pdf/W-NUT20189.pdf). In *Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text*, and Vahtola et al. (2021). [Coping with Noisy Training Data Labels in Paraphrase Detection](https://aclanthology.org/2021.wnut-1.32/). In *Proceedings of the 7th Workshop on Noisy User-generated Text*.\n\n\n\n\n## Dataset in GEM\n\n### Rationale for Inclusion in GEM\n\n#### Why is the Dataset in GEM?\n\n\u0026#x3C;!-- info: What does this dataset contribute toward better generation evaluation and why is it part of GEM? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nOpusparcus provides examples of sentences that mean the same thing or have very similar meaning. Sentences are available in six languages and the style is colloquial language.\n\n#### Similar Datasets\n\n\u0026#x3C;!-- info: Do other datasets for the high level task exist? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes\n\n#### Unique Language Coverage\n\n\u0026#x3C;!-- info: Does this dataset cover other languages than other datasets for the same task? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nyes\n\n#### Difference from other GEM datasets\n\n\u0026#x3C;!-- info: What else sets this dataset apart from other similar datasets in GEM? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThere is another data set containing manually labeled Finnish paraphrases.\n\n#### Ability that the Dataset measures\n\n\u0026#x3C;!-- info: What aspect of model ability can be measured with this dataset? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nSentence meaning\n\n\n### GEM-Specific Curation\n\n#### Modificatied for GEM?\n\n\u0026#x3C;!-- info: Has the GEM version of the dataset been modified in any way (data, processing, splits) from the original curated data? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes\n\n#### GEM Modifications\n\n\u0026#x3C;!-- info: What changes have been made to he original dataset? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`other`\n\n#### Modification Details\n\n\u0026#x3C;!-- info: For each of these changes, described them in more details and provided the intended purpose of the modification --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nTraining sets have been prepared for each the \"quality levels\" 60% – 95%.\n\nIn the original release, this task was left to the user of the data.\n\n#### Additional Splits?\n\n\u0026#x3C;!-- info: Does GEM provide additional splits to the dataset? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes\n\n#### Split Information\n\n\u0026#x3C;!-- info: Describe how the new splits were created --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nThere are two versions of the validations and test sets: the regular sets which only contain positive examples of paraphrases and the full sets containing all examples.\n\n#### Split Motivation\n\n\u0026#x3C;!-- info: What aspects of the model's generation capacities were the splits created to test? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nIn the original release, only the full validation and test sets were supplied. The \"regular sets\" have been added in order to make it easier to test on true parapahrases only. \n\n\n### Getting Started with the Task\n\n#### Pointers to Resources\n\n\u0026#x3C;!-- info: Getting started with in-depth research on the task. Add relevant pointers to resources that researchers can consult when they want to get started digging deeper into the task. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nCreutz (2018). [Open Subtitles Paraphrase Corpus for Six Languages](http://www.lrec-conf.org/proceedings/lrec2018/pdf/131.pdf), Proceedings of the 11th edition of the Language Resources and Evaluation Conference (LREC 2018).\n\nSjöblom et al. (2018). [Paraphrase Detection on Noisy Subtitles in Six Languages](http://noisy-text.github.io/2018/pdf/W-NUT20189.pdf). In Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text.\n\nAulamo et al. (2019). [Annotation of subtitle paraphrases using a new web tool.](http://ceur-ws.org/Vol-2364/3_paper.pdf) In Proceedings of the Digital Humanities in the Nordic Countries 4th Conference.\n\nSjöblom et al. (2020). [Paraphrase Generation and Evaluation on Colloquial-Style Sentences](https://aclanthology.org/2020.lrec-1.224/), Proceedings of the 12th Language Resources and Evaluation Conference (LREC). \n\nVahtola et al. (2021). [Coping with Noisy Training Data Labels in Paraphrase Detection](https://aclanthology.org/2021.wnut-1.32/). In Proceedings of the 7th Workshop on Noisy User-generated Text.\n\n\n\n\n## Previous Results\n\n### Previous Results\n\n#### Measured Model Abilities\n\n\u0026#x3C;!-- info: What aspect of model ability can be measured with this dataset? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nSentence meaning\n\nIn a scenario of paraphrase detection, the model determines whether two given sentences carry approximately the same meaning.\n\nIn a scenario of paraphrase generation, the model generates a potential paraphrase of a given sentence.\n\n#### Metrics\n\n\u0026#x3C;!-- info: What metrics are typically used for this task? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`BLEU`, `BERT-Score`, `Other: Other Metrics`\n\n#### Other Metrics\n\n\u0026#x3C;!-- info: Definitions of other metrics --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nPINC\n\n#### Proposed Evaluation\n\n\u0026#x3C;!-- info: List and describe the purpose of the metrics and evaluation methodology (including human evaluation) that the dataset creators used when introducing this task. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe metrics mentioned above can be used to assess how well a generated paraphrase corresponds to a given reference sentence. The PINC score additionally assesses how different the surface forms are.\n\n#### Previous results available?\n\n\u0026#x3C;!-- info: Are previous results available? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes\n\n#### Other Evaluation Approaches\n\n\u0026#x3C;!-- info: What evaluation approaches have others used? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nSee publications on using Opusparcus\n\n#### Relevant Previous Results\n\n\u0026#x3C;!-- info: What are the most relevant previous results for this task/dataset? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nSjöblom et al. (2020). [Paraphrase Generation and Evaluation on Colloquial-Style Sentences](https://aclanthology.org/2020.lrec-1.224/), Proceedings of the 12th Language Resources and Evaluation Conference (LREC).\n\n\n\n## Dataset Curation\n\n### Original Curation\n\n#### Original Curation Rationale\n\n\u0026#x3C;!-- info: Original curation rationale --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nOpusparcus was created in order to produce a *sentential* paraphrase corpus for multiple languages containing *colloquial* language (as opposed to news or religious text, for instance).\n\n#### Communicative Goal\n\n\u0026#x3C;!-- info: What was the communicative goal? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nOpusparcus provides labeled examples of pairs of sentences that have similar (or dissimilar) meanings.\n\n#### Sourced from Different Sources\n\n\u0026#x3C;!-- info: Is the dataset aggregated from different data sources? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n### Language Data\n\n#### How was Language Data Obtained?\n\n\u0026#x3C;!-- info: How was the language data obtained? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\n`Crowdsourced`\n\n#### Where was it crowdsourced?\n\n\u0026#x3C;!-- info: If crowdsourced, where from? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`Other crowdworker platform`\n\n#### Language Producers\n\n\u0026#x3C;!-- info: What further information do we have on the language producers? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe data in Opusparcus has been extracted from [OpenSubtitles2016](http://opus.nlpl.eu/OpenSubtitles2016.php), which is in turn based on data from [OpenSubtitles.org](http://www.opensubtitles.org/).\n\nThe texts consists of subtitles that have been produced using crowdsourcing.\n\n#### Topics Covered\n\n\u0026#x3C;!-- info: Does the language in the dataset focus on specific topics? How would you describe them? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nThe language is representative of movies and TV shows. Domains covered include comedy, drama, relationships, suspense, etc.\n\n#### Data Validation\n\n\u0026#x3C;!-- info: Was the text validated by a different worker or a data curator? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nvalidated by data curator\n\n#### Data Preprocessing\n\n\u0026#x3C;!-- info: How was the text data pre-processed? (Enter N/A if the text was not pre-processed) --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nSentence and word tokenization was performed.\n\n#### Was Data Filtered?\n\n\u0026#x3C;!-- info: Were text instances selected or filtered? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nalgorithmically\n\n#### Filter Criteria\n\n\u0026#x3C;!-- info: What were the selection criteria? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe sentence pairs in the training sets were ordered automatically based on the estimated likelihood that the sentences were paraphrases, most likely paraphrases on the top, and least likely paraphrases on the bottom.\n\nThe validation and test sets were checked and annotated manually, but the sentence pairs selected for annotation had to be different enough in terms of minimum edit distance (Levenshtein distance). This ensured that annotators would not spend their time annotating pairs of more or less identical sentences.\n\n\n### Structured Annotations\n\n#### Additional Annotations?\n\n\u0026#x3C;!-- quick --\u003e\n\u0026#x3C;!-- info: Does the dataset have additional annotations for each instance? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nexpert created\n\n#### Number of Raters\n\n\u0026#x3C;!-- info: What is the number of raters --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\n11\u0026#x3C;n\u0026#x3C;50\n\n#### Rater Qualifications\n\n\u0026#x3C;!-- info: Describe the qualifications required of an annotator. --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nStudents and staff at the University of Helsinki (native or very proficient speakers of the target languages)\n\n#### Raters per Training Example\n\n\u0026#x3C;!-- info: How many annotators saw each training example? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n0\n\n#### Raters per Test Example\n\n\u0026#x3C;!-- info: How many annotators saw each test example? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n2\n\n#### Annotation Service?\n\n\u0026#x3C;!-- info: Was an annotation service used? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n#### Annotation Values\n\n\u0026#x3C;!-- info: Purpose and values for each annotation --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe development and test sets consist of sentence pairs that have been annotated manually; each set contains approximately 1000 sentence pairs that have been verified to be acceptable paraphrases by two independent annotators.\n\nThe `annot_score` field reflects the judgments made by the annotators. If the annnotators fully agreed on the category (4.0: dark green, 3.0: light green, 2.0: yellow, 1.0: red), the value of `annot_score` is 4.0, 3.0, 2.0 or 1.0.  If the annotators chose adjacent categories, the value in this field will be 3.5, 2.5 or 1.5.  For instance, a value of 2.5 means that one annotator gave a score of 3 (\"mostly good\"), indicating a possible paraphrase pair, whereas the other annotator scored this as a 2 (\"mostly bad\"), that is, unlikely to be a paraphrase pair.  If the annotators disagreed by more than one category, the sentence pair was discarded and won't show up in the datasets.\n\nAnnotators could also reject a sentence pair as being corrupted data.\n\n#### Any Quality Control?\n\n\u0026#x3C;!-- info: Quality control measures? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nvalidated by another rater\n\n#### Quality Control Details\n\n\u0026#x3C;!-- info: Describe the quality control measures that were taken. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nIf the annotators disagreed by more than one category, the sentence pair was discarded and is not part of the final dataset.\n\n\n### Consent\n\n#### Any Consent Policy?\n\n\u0026#x3C;!-- info: Was there a consent policy involved when gathering the data? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n### Private Identifying Information (PII)\n\n#### Contains PII?\n\n\u0026#x3C;!-- quick --\u003e\n\u0026#x3C;!-- info: Does the source language data likely contain Personal Identifying Information about the data creators or subjects? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes/very likely\n\n#### Any PII Identification?\n\n\u0026#x3C;!-- info: Did the curators use any automatic/manual method to identify PII in the dataset? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nno identification\n\n\n### Maintenance\n\n#### Any Maintenance Plan?\n\n\u0026#x3C;!-- info: Does the original dataset have a maintenance plan? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n\n## Broader Social Context\n\n### Previous Work on the Social Impact of the Dataset\n\n#### Usage of Models based on the Data\n\n\u0026#x3C;!-- info: Are you aware of cases where models trained on the task featured in this dataset ore related tasks have been used in automated systems? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n### Impact on Under-Served Communities\n\n#### Addresses needs of underserved Communities?\n\n\u0026#x3C;!-- info: Does this dataset address the needs of communities that are traditionally underserved in language technology, and particularly language generation technology? Communities may be underserved for exemple because their language, language variety, or social or geographical context is underepresented in NLP and NLG resources (datasets and models). --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n### Discussion of Biases\n\n#### Any Documented Social Biases?\n\n\u0026#x3C;!-- info: Are there documented social biases in the dataset? Biases in this context are variations in the ways members of different social categories are represented that can have harmful downstream consequences for members of the more disadvantaged group. --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n#### Are the Language Producers Representative of the Language?\n\n\u0026#x3C;!-- info: Does the distribution of language producers in the dataset accurately represent the full distribution of speakers of the language world-wide? If not, how does it differ? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nWhat social bias there may be in the subtitles in this dataset has not been studied.\n\n\n\n## Considerations for Using the Data\n\n### PII Risks and Liability\n\n#### Potential PII Risk\n\n\u0026#x3C;!-- info: Considering your answers to the PII part of the Data Curation Section, describe any potential privacy to the data subjects and creators risks when using the dataset. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe data only contains subtitles of publicly available movies and TV shows.\n\n\n### Licenses\n\n#### Copyright Restrictions on the Dataset\n\n\u0026#x3C;!-- info: Based on your answers in the Intended Use part of the Data Overview Section, which of the following best describe the copyright and licensing status of the dataset? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`non-commercial use only`\n\n#### Copyright Restrictions on the Language Data\n\n\u0026#x3C;!-- info: Based on your answers in the Language part of the Data Curation Section, which of the following best describe the copyright and licensing status of the underlying language data? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`non-commercial use only`\n\n\n### Known Technical Limitations\n\n#### Technical Limitations\n\n\u0026#x3C;!-- info: Describe any known technical limitations, such as spurrious correlations, train/test overlap, annotation biases, or mis-annotations, and cite the works that first identified these limitations when possible. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nSome subtitles contain typos that are caused by inaccurate OCR.\n\n#### Unsuited Applications\n\n\u0026#x3C;!-- info: When using a model trained on this dataset in a setting where users or the public may interact with its predictions, what are some pitfalls to look out for? In particular, describe some applications of the general task featured in this dataset that its curation or properties make it less suitable for. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe models might memorize individual subtitles of existing movies and TV shows, but there is no context across sentence boundaries in the data.\n\n#### Discouraged Use Cases\n\n\u0026#x3C;!-- info: What are some discouraged use cases of a model trained to maximize the proposed metrics on this dataset? In particular, think about settings where decisions made by a model that performs reasonably well on the metric my still have strong negative consequences for user or members of the public. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nA general issue with paraphrasing is that very small modifications in the surface form might produce valid paraphrases, which are however rather uninteresting. It is more valuable to produce paraphrases with clearly different surface realizations (e.g., measured using minimum edit distance).\n\n\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n\u003c/div\u003e","title":"opusparcus","type":"Paraphrasing","languages":"German, English, Finnish, French, Russian, Swedish","summary":"Opusparcus is a paraphrase corpus for six European languages - German, English, Finnish, French, Russian, and Swedish. The paraphrases consist of subtitles from movies and TV shows."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"opusparcus"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"opusparcus","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eopusparcus\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eOpusparcus is a paraphrase corpus for six European languages: German, English, Finnish, French, Russian, and\n          Swedish. The paraphrases consist of subtitles from movies and TV shows.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/opusparcus')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/opusparcus\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://urn.fi/urn:nbn:fi:lb-2018021221\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://www.lrec-conf.org/proceedings/lrec2018/pdf/131.pdf\"\u003eLREC\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eMathias Creutz\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFinnish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e,\n          \u003ccode\u003eSwedish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eModels can be trained, e.g., for paraphrase detection and generation, that is, determining whether two given\n          sentences mean the same thing or generating new paraphrases for a given sentence.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://urn.fi/urn:nbn:fi:lb-2018021221\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://urn.fi/urn:nbn:fi:lb-2018021221\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://www.lrec-conf.org/proceedings/lrec2018/pdf/131.pdf\"\u003eLREC\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@InProceedings{creutz:lrec2018,\ntitle = {Open Subtitles Paraphrase Corpus for Six Languages},\nauthor={Mathias Creutz},\nbooktitle={Proceedings of the 11th edition of the Language Resources and Evaluation Conference (LREC 2018)},\nyear={2018},\nmonth = {May 7-12},\naddress = {Miyazaki, Japan},\neditor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga},\npublisher = {European Language Resources Association (ELRA)},\nisbn = {979-10-95546-00-9},\nlanguage = {english},\nurl={http://www.lrec-conf.org/proceedings/lrec2018/pdf/131.pdf}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMathias Creutz\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003efirstname dot lastname at helsinki dot fi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFinnish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n              \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eSwedish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOpusparcus is a paraphrase corpus for six European languages: German, English, Finnish, French, Russian,\n              and Swedish. The paraphrases consist of subtitles from movies and TV shows.\u003c/p\u003e\n            \u003cp\u003eThe data in Opusparcus has been extracted from \u003ca\n                href=\"http://opus.nlpl.eu/OpenSubtitles2016.php\"\u003eOpenSubtitles2016\u003c/a\u003e, which is in turn based on data\n              from \u003ca href=\"http://www.opensubtitles.org/\"\u003eOpenSubtitles\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOpusparcus is a sentential paraphrase corpus for multiple languages containing colloquial language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eParaphrasing\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eModels can be trained, e.g., for paraphrase detection and generation, that is, determining whether two\n              given sentences mean the same thing or generating new paraphrases for a given sentence.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMathias Creutz (University of Helsinki)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003esent1\u003c/code\u003e: a tokenized sentence\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esent2\u003c/code\u003e: another tokenized sentence, which is potentially a paraphrase of\n                \u003ccode\u003esent1\u003c/code\u003e.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eannot_score\u003c/code\u003e: a value between 1.0 and 4.0 indicating how good an example of paraphrases\n                \u003ccode\u003esent1\u003c/code\u003e and \u003ccode\u003esent2\u003c/code\u003e are. (For the training sets, the value is 0.0, which indicates\n                that no manual annotation has taken place.)\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003elang\u003c/code\u003e: language of this dataset\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: unique identifier of this entry\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eAll fields are strings except \u003ccode\u003eannot_score\u003c/code\u003e, which is a float.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor each target language, the Opusparcus data have been partitioned into three types of data sets:\n              training, validation and test sets. The training sets are large, consisting of millions of sentence pairs,\n              and have been compiled automatically, with the help of probabilistic ranking functions. The development\n              and test sets consist of sentence pairs that have been annotated manually; each set contains approximately\n              1000 sentence pairs that have been verified to be acceptable paraphrases by two independent annotators.\n            \u003c/p\u003e\n            \u003cp\u003eWhen you download Opusparcus, you must always indicate the language you want to retrieve, for instance:\n            \u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003edata = load_dataset(\"GEM/opusparcus\", lang=\"de\")\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe above command will download the validation and test sets for German. If additionally, you want to\n              retrieve training data, you need to specify the level of quality you desire, such as \"French, with 90%\n              quality of the training data\":\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003edata = load_dataset(\"GEM/opusparcus\", lang=\"fr\", quality=90)\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe entries in the training sets have been ranked automatically by how likely they are paraphrases, best\n              first, worst last. The quality parameter indicates the estimated proportion (in percent) of true\n              paraphrases in the training set. Allowed quality values range between 60 and 100, in increments of 5 (60,\n              65, 70, ..., 100). A value of 60 means that 60% of the sentence pairs in the training set are estimated to\n              be true paraphrases (and the remaining 40% are not). A higher value produces a smaller but cleaner set.\n              The smaller sets are subsets of the larger sets, such that the \u003ccode\u003equality=95\u003c/code\u003e set is a subset of\n              \u003ccode\u003equality=90\u003c/code\u003e, which is a subset of \u003ccode\u003equality=85\u003c/code\u003e, and so on.\u003c/p\u003e\n            \u003cp\u003eThe default \u003ccode\u003equality\u003c/code\u003e value, if omitted, is 100. This matches no training data at all, which\n              can be convenient, if you are only interested in the validation and test sets, which are considerably\n              smaller, but manually annotated.\u003c/p\u003e\n            \u003cp\u003eNote that an alternative to typing the parameter values explicitly, you can use configuration names\n              instead. The following commands are equivalent to the ones above:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003edata = load_dataset(\"GEM/opusparcus\", \"de.100\")\ndata = load_dataset(\"GEM/opusparcus\", \"fr.90\")\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators have used the following scores to label sentence pairs in the test and validation sets:\u003c/p\u003e\n            \u003cp\u003e4: Good example of paraphrases (Dark green button in the annotation tool): The two sentences can be used\n              in the same situation and essentially \"mean the same thing\".\u003c/p\u003e\n            \u003cp\u003e3: Mostly good example of paraphrases (Light green button in the annotation tool): It is acceptable to\n              think that the two sentences refer to the same thing, although one sentence might be more specific\n              than the other one, or there are differences in style, such as polite form versus familiar form.\u003c/p\u003e\n            \u003cp\u003e2: Mostly bad example of paraphrases (Yellow button in the annotation tool): There is some connection\n              between the sentences that explains why they occur together, but one would not really consider them to\n              mean the same thing.\u003c/p\u003e\n            \u003cp\u003e1: Bad example of paraphrases (Red button in the annotation tool): There is no obvious connection. The\n              sentences mean different things.\u003c/p\u003e\n            \u003cp\u003eIf the two annotators fully agreed on the category, the value in the \u003ccode\u003eannot_score\u003c/code\u003e field is\n              4.0, 3.0, 2.0 or 1.0. If the two annotators chose adjacent categories, the value in this field will be\n              3.5, 2.5 or\n              1.5. For instance, a value of 2.5 means that one annotator gave a score of 3 (\"mostly good\"), indicating a\n              possible paraphrase pair, whereas the other annotator scored this as a 2 (\"mostly bad\"), that is, unlikely\n              to be a paraphrase pair. If the annotators disagreed by more than one category, the sentence pair was\n              discarded and won't show up in the datasets.\u003c/p\u003e\n            \u003cp\u003eThe training sets were not annotated manually. This is indicated by\n              the value 0.0 in the \u003ccode\u003eannot_score\u003c/code\u003e field.\u003c/p\u003e\n            \u003cp\u003eFor an assessment of of inter-annotator agreement, see Aulamo et al. (2019). \u003ca\n                href=\"http://ceur-ws.org/Vol-2364/3_paper.pdf\"\u003eAnnotation of subtitle paraphrases using a new web\n                tool.\u003c/a\u003e In \u003cem\u003eProceedings of the\n                Digital Humanities in the Nordic Countries 4th Conference\u003c/em\u003e, Copenhagen, Denmark.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'annot_score': 4.0, 'gem_id': 'gem-opusparcus-test-1587', 'lang': 'en', 'sent1': \"I haven 't been contacted by anybody .\", 'sent2': \"Nobody 's contacted me .\"}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is split into training, validation and test sets. The validation and test sets come in two\n              versions, the regular validation and test sets and the full sets, called validation.full and test.full.\n              The full sets contain all sentence pairs successfully annotated by the annotators, including the sentence\n              pairs that were rejected as paraphrases. The annotation scores of the full sets thus range between 1.0 and\n              4.0. The regular validation and test sets only contain sentence pairs that qualify as paraphrases, scored\n              between 3.0 and 4.0 by the annotators.\u003c/p\u003e\n            \u003cp\u003eThe number of sentence pairs in the data splits are as follows for each of the languages. The range\n              between the smallest (\u003ccode\u003equality=95\u003c/code\u003e) and largest (\u003ccode\u003equality=60\u003c/code\u003e) train configuration\n              have been shown.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003etrain\u003c/th\u003e\n                    \u003cth\u003evalid\u003c/th\u003e\n                    \u003cth\u003etest\u003c/th\u003e\n                    \u003cth\u003evalid.full\u003c/th\u003e\n                    \u003cth\u003etest.full\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ede\u003c/td\u003e\n                    \u003ctd\u003e0.59M .. 13M\u003c/td\u003e\n                    \u003ctd\u003e1013\u003c/td\u003e\n                    \u003ctd\u003e1047\u003c/td\u003e\n                    \u003ctd\u003e1582\u003c/td\u003e\n                    \u003ctd\u003e1586\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003een\u003c/td\u003e\n                    \u003ctd\u003e1.0M .. 35M\u003c/td\u003e\n                    \u003ctd\u003e1015\u003c/td\u003e\n                    \u003ctd\u003e982\u003c/td\u003e\n                    \u003ctd\u003e1455\u003c/td\u003e\n                    \u003ctd\u003e1445\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003efi\u003c/td\u003e\n                    \u003ctd\u003e0.48M .. 8.9M\u003c/td\u003e\n                    \u003ctd\u003e963\u003c/td\u003e\n                    \u003ctd\u003e958\u003c/td\u003e\n                    \u003ctd\u003e1760\u003c/td\u003e\n                    \u003ctd\u003e1749\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003efr\u003c/td\u003e\n                    \u003ctd\u003e0.94M .. 22M\u003c/td\u003e\n                    \u003ctd\u003e997\u003c/td\u003e\n                    \u003ctd\u003e1007\u003c/td\u003e\n                    \u003ctd\u003e1630\u003c/td\u003e\n                    \u003ctd\u003e1674\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eru\u003c/td\u003e\n                    \u003ctd\u003e0.15M .. 15M\u003c/td\u003e\n                    \u003ctd\u003e1020\u003c/td\u003e\n                    \u003ctd\u003e1068\u003c/td\u003e\n                    \u003ctd\u003e1854\u003c/td\u003e\n                    \u003ctd\u003e1855\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esv\u003c/td\u003e\n                    \u003ctd\u003e0.24M .. 4.5M\u003c/td\u003e\n                    \u003ctd\u003e984\u003c/td\u003e\n                    \u003ctd\u003e947\u003c/td\u003e\n                    \u003ctd\u003e1887\u003c/td\u003e\n                    \u003ctd\u003e1901\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eAs a concrete example, loading the English data requesting 95% quality of the train split produces the\n              following:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e\u003e\u003e\u003e data = load_dataset(\"GEM/opusparcus\", lang=\"en\", quality=95)\n\n\u003e\u003e\u003e data\nDatasetDict({\ntest: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 982\n})\nvalidation: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 1015\n})\ntest.full: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 1445\n})\nvalidation.full: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 1455\n})\ntrain: Dataset({\n  features: ['lang', 'sent1', 'sent2', 'annot_score', 'gem_id'],\n  num_rows: 1000000\n})\n})\n\n\u003e\u003e\u003e data[\"test\"][0]\n{'annot_score': 4.0, 'gem_id': 'gem-opusparcus-test-1587', 'lang': 'en', 'sent1': \"I haven 't been contacted by anybody .\", 'sent2': \"Nobody 's contacted me .\"}\n\n\u003e\u003e\u003e data[\"validation\"][2]\n{'annot_score': 3.0, 'gem_id': 'gem-opusparcus-validation-1586', 'lang': 'en', 'sent1': 'No promises , okay ?', 'sent2': \"I 'm not promising anything .\"}\n\n\u003e\u003e\u003e data[\"train\"][1000]\n{'annot_score': 0.0, 'gem_id': 'gem-opusparcus-train-12501001', 'lang': 'en', 'sent1': 'Am I beautiful ?', 'sent2': 'Am I pretty ?'}\n\n#### Splitting Criteria\n\n\u0026#x3C;!-- info: Describe any criteria for splitting the data, if used. If there are differences between the splits (e.g., if the training annotations are machine-generated and the dev and test ones are created by humans, or if different numbers of annotators contributed to each example), describe them here. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe validation and test sets have been annotated manually, but the training sets have been produced using automatic scoring and come in different size configurations depending on the desired quality level. (See above descriptions and examples for more details.)\n\nPlease note that previous work suggests that a larger and noisier training set is better than a\nsmaller and clean set. See Sjöblom et al. (2018). [Paraphrase Detection on Noisy Subtitles in Six\nLanguages](http://noisy-text.github.io/2018/pdf/W-NUT20189.pdf). In *Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text*, and Vahtola et al. (2021). [Coping with Noisy Training Data Labels in Paraphrase Detection](https://aclanthology.org/2021.wnut-1.32/). In *Proceedings of the 7th Workshop on Noisy User-generated Text*.\n\n\n\n\n## Dataset in GEM\n\n### Rationale for Inclusion in GEM\n\n#### Why is the Dataset in GEM?\n\n\u0026#x3C;!-- info: What does this dataset contribute toward better generation evaluation and why is it part of GEM? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nOpusparcus provides examples of sentences that mean the same thing or have very similar meaning. Sentences are available in six languages and the style is colloquial language.\n\n#### Similar Datasets\n\n\u0026#x3C;!-- info: Do other datasets for the high level task exist? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes\n\n#### Unique Language Coverage\n\n\u0026#x3C;!-- info: Does this dataset cover other languages than other datasets for the same task? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nyes\n\n#### Difference from other GEM datasets\n\n\u0026#x3C;!-- info: What else sets this dataset apart from other similar datasets in GEM? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThere is another data set containing manually labeled Finnish paraphrases.\n\n#### Ability that the Dataset measures\n\n\u0026#x3C;!-- info: What aspect of model ability can be measured with this dataset? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nSentence meaning\n\n\n### GEM-Specific Curation\n\n#### Modificatied for GEM?\n\n\u0026#x3C;!-- info: Has the GEM version of the dataset been modified in any way (data, processing, splits) from the original curated data? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes\n\n#### GEM Modifications\n\n\u0026#x3C;!-- info: What changes have been made to he original dataset? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`other`\n\n#### Modification Details\n\n\u0026#x3C;!-- info: For each of these changes, described them in more details and provided the intended purpose of the modification --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nTraining sets have been prepared for each the \"quality levels\" 60% – 95%.\n\nIn the original release, this task was left to the user of the data.\n\n#### Additional Splits?\n\n\u0026#x3C;!-- info: Does GEM provide additional splits to the dataset? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes\n\n#### Split Information\n\n\u0026#x3C;!-- info: Describe how the new splits were created --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nThere are two versions of the validations and test sets: the regular sets which only contain positive examples of paraphrases and the full sets containing all examples.\n\n#### Split Motivation\n\n\u0026#x3C;!-- info: What aspects of the model's generation capacities were the splits created to test? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nIn the original release, only the full validation and test sets were supplied. The \"regular sets\" have been added in order to make it easier to test on true parapahrases only. \n\n\n### Getting Started with the Task\n\n#### Pointers to Resources\n\n\u0026#x3C;!-- info: Getting started with in-depth research on the task. Add relevant pointers to resources that researchers can consult when they want to get started digging deeper into the task. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nCreutz (2018). [Open Subtitles Paraphrase Corpus for Six Languages](http://www.lrec-conf.org/proceedings/lrec2018/pdf/131.pdf), Proceedings of the 11th edition of the Language Resources and Evaluation Conference (LREC 2018).\n\nSjöblom et al. (2018). [Paraphrase Detection on Noisy Subtitles in Six Languages](http://noisy-text.github.io/2018/pdf/W-NUT20189.pdf). In Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text.\n\nAulamo et al. (2019). [Annotation of subtitle paraphrases using a new web tool.](http://ceur-ws.org/Vol-2364/3_paper.pdf) In Proceedings of the Digital Humanities in the Nordic Countries 4th Conference.\n\nSjöblom et al. (2020). [Paraphrase Generation and Evaluation on Colloquial-Style Sentences](https://aclanthology.org/2020.lrec-1.224/), Proceedings of the 12th Language Resources and Evaluation Conference (LREC). \n\nVahtola et al. (2021). [Coping with Noisy Training Data Labels in Paraphrase Detection](https://aclanthology.org/2021.wnut-1.32/). In Proceedings of the 7th Workshop on Noisy User-generated Text.\n\n\n\n\n## Previous Results\n\n### Previous Results\n\n#### Measured Model Abilities\n\n\u0026#x3C;!-- info: What aspect of model ability can be measured with this dataset? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nSentence meaning\n\nIn a scenario of paraphrase detection, the model determines whether two given sentences carry approximately the same meaning.\n\nIn a scenario of paraphrase generation, the model generates a potential paraphrase of a given sentence.\n\n#### Metrics\n\n\u0026#x3C;!-- info: What metrics are typically used for this task? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`BLEU`, `BERT-Score`, `Other: Other Metrics`\n\n#### Other Metrics\n\n\u0026#x3C;!-- info: Definitions of other metrics --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nPINC\n\n#### Proposed Evaluation\n\n\u0026#x3C;!-- info: List and describe the purpose of the metrics and evaluation methodology (including human evaluation) that the dataset creators used when introducing this task. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe metrics mentioned above can be used to assess how well a generated paraphrase corresponds to a given reference sentence. The PINC score additionally assesses how different the surface forms are.\n\n#### Previous results available?\n\n\u0026#x3C;!-- info: Are previous results available? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes\n\n#### Other Evaluation Approaches\n\n\u0026#x3C;!-- info: What evaluation approaches have others used? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nSee publications on using Opusparcus\n\n#### Relevant Previous Results\n\n\u0026#x3C;!-- info: What are the most relevant previous results for this task/dataset? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nSjöblom et al. (2020). [Paraphrase Generation and Evaluation on Colloquial-Style Sentences](https://aclanthology.org/2020.lrec-1.224/), Proceedings of the 12th Language Resources and Evaluation Conference (LREC).\n\n\n\n## Dataset Curation\n\n### Original Curation\n\n#### Original Curation Rationale\n\n\u0026#x3C;!-- info: Original curation rationale --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nOpusparcus was created in order to produce a *sentential* paraphrase corpus for multiple languages containing *colloquial* language (as opposed to news or religious text, for instance).\n\n#### Communicative Goal\n\n\u0026#x3C;!-- info: What was the communicative goal? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nOpusparcus provides labeled examples of pairs of sentences that have similar (or dissimilar) meanings.\n\n#### Sourced from Different Sources\n\n\u0026#x3C;!-- info: Is the dataset aggregated from different data sources? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n### Language Data\n\n#### How was Language Data Obtained?\n\n\u0026#x3C;!-- info: How was the language data obtained? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\n`Crowdsourced`\n\n#### Where was it crowdsourced?\n\n\u0026#x3C;!-- info: If crowdsourced, where from? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`Other crowdworker platform`\n\n#### Language Producers\n\n\u0026#x3C;!-- info: What further information do we have on the language producers? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe data in Opusparcus has been extracted from [OpenSubtitles2016](http://opus.nlpl.eu/OpenSubtitles2016.php), which is in turn based on data from [OpenSubtitles.org](http://www.opensubtitles.org/).\n\nThe texts consists of subtitles that have been produced using crowdsourcing.\n\n#### Topics Covered\n\n\u0026#x3C;!-- info: Does the language in the dataset focus on specific topics? How would you describe them? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nThe language is representative of movies and TV shows. Domains covered include comedy, drama, relationships, suspense, etc.\n\n#### Data Validation\n\n\u0026#x3C;!-- info: Was the text validated by a different worker or a data curator? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nvalidated by data curator\n\n#### Data Preprocessing\n\n\u0026#x3C;!-- info: How was the text data pre-processed? (Enter N/A if the text was not pre-processed) --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nSentence and word tokenization was performed.\n\n#### Was Data Filtered?\n\n\u0026#x3C;!-- info: Were text instances selected or filtered? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nalgorithmically\n\n#### Filter Criteria\n\n\u0026#x3C;!-- info: What were the selection criteria? --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe sentence pairs in the training sets were ordered automatically based on the estimated likelihood that the sentences were paraphrases, most likely paraphrases on the top, and least likely paraphrases on the bottom.\n\nThe validation and test sets were checked and annotated manually, but the sentence pairs selected for annotation had to be different enough in terms of minimum edit distance (Levenshtein distance). This ensured that annotators would not spend their time annotating pairs of more or less identical sentences.\n\n\n### Structured Annotations\n\n#### Additional Annotations?\n\n\u0026#x3C;!-- quick --\u003e\n\u0026#x3C;!-- info: Does the dataset have additional annotations for each instance? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nexpert created\n\n#### Number of Raters\n\n\u0026#x3C;!-- info: What is the number of raters --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\n11\u0026#x3C;n\u0026#x3C;50\n\n#### Rater Qualifications\n\n\u0026#x3C;!-- info: Describe the qualifications required of an annotator. --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nStudents and staff at the University of Helsinki (native or very proficient speakers of the target languages)\n\n#### Raters per Training Example\n\n\u0026#x3C;!-- info: How many annotators saw each training example? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n0\n\n#### Raters per Test Example\n\n\u0026#x3C;!-- info: How many annotators saw each test example? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n2\n\n#### Annotation Service?\n\n\u0026#x3C;!-- info: Was an annotation service used? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n#### Annotation Values\n\n\u0026#x3C;!-- info: Purpose and values for each annotation --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe development and test sets consist of sentence pairs that have been annotated manually; each set contains approximately 1000 sentence pairs that have been verified to be acceptable paraphrases by two independent annotators.\n\nThe `annot_score` field reflects the judgments made by the annotators. If the annnotators fully agreed on the category (4.0: dark green, 3.0: light green, 2.0: yellow, 1.0: red), the value of `annot_score` is 4.0, 3.0, 2.0 or 1.0.  If the annotators chose adjacent categories, the value in this field will be 3.5, 2.5 or 1.5.  For instance, a value of 2.5 means that one annotator gave a score of 3 (\"mostly good\"), indicating a possible paraphrase pair, whereas the other annotator scored this as a 2 (\"mostly bad\"), that is, unlikely to be a paraphrase pair.  If the annotators disagreed by more than one category, the sentence pair was discarded and won't show up in the datasets.\n\nAnnotators could also reject a sentence pair as being corrupted data.\n\n#### Any Quality Control?\n\n\u0026#x3C;!-- info: Quality control measures? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nvalidated by another rater\n\n#### Quality Control Details\n\n\u0026#x3C;!-- info: Describe the quality control measures that were taken. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nIf the annotators disagreed by more than one category, the sentence pair was discarded and is not part of the final dataset.\n\n\n### Consent\n\n#### Any Consent Policy?\n\n\u0026#x3C;!-- info: Was there a consent policy involved when gathering the data? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n### Private Identifying Information (PII)\n\n#### Contains PII?\n\n\u0026#x3C;!-- quick --\u003e\n\u0026#x3C;!-- info: Does the source language data likely contain Personal Identifying Information about the data creators or subjects? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nyes/very likely\n\n#### Any PII Identification?\n\n\u0026#x3C;!-- info: Did the curators use any automatic/manual method to identify PII in the dataset? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nno identification\n\n\n### Maintenance\n\n#### Any Maintenance Plan?\n\n\u0026#x3C;!-- info: Does the original dataset have a maintenance plan? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n\n## Broader Social Context\n\n### Previous Work on the Social Impact of the Dataset\n\n#### Usage of Models based on the Data\n\n\u0026#x3C;!-- info: Are you aware of cases where models trained on the task featured in this dataset ore related tasks have been used in automated systems? --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n### Impact on Under-Served Communities\n\n#### Addresses needs of underserved Communities?\n\n\u0026#x3C;!-- info: Does this dataset address the needs of communities that are traditionally underserved in language technology, and particularly language generation technology? Communities may be underserved for exemple because their language, language variety, or social or geographical context is underepresented in NLP and NLG resources (datasets and models). --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n\n### Discussion of Biases\n\n#### Any Documented Social Biases?\n\n\u0026#x3C;!-- info: Are there documented social biases in the dataset? Biases in this context are variations in the ways members of different social categories are represented that can have harmful downstream consequences for members of the more disadvantaged group. --\u003e\n\u0026#x3C;!-- scope: telescope --\u003e\nno\n\n#### Are the Language Producers Representative of the Language?\n\n\u0026#x3C;!-- info: Does the distribution of language producers in the dataset accurately represent the full distribution of speakers of the language world-wide? If not, how does it differ? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\nWhat social bias there may be in the subtitles in this dataset has not been studied.\n\n\n\n## Considerations for Using the Data\n\n### PII Risks and Liability\n\n#### Potential PII Risk\n\n\u0026#x3C;!-- info: Considering your answers to the PII part of the Data Curation Section, describe any potential privacy to the data subjects and creators risks when using the dataset. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe data only contains subtitles of publicly available movies and TV shows.\n\n\n### Licenses\n\n#### Copyright Restrictions on the Dataset\n\n\u0026#x3C;!-- info: Based on your answers in the Intended Use part of the Data Overview Section, which of the following best describe the copyright and licensing status of the dataset? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`non-commercial use only`\n\n#### Copyright Restrictions on the Language Data\n\n\u0026#x3C;!-- info: Based on your answers in the Language part of the Data Curation Section, which of the following best describe the copyright and licensing status of the underlying language data? --\u003e\n\u0026#x3C;!-- scope: periscope --\u003e\n`non-commercial use only`\n\n\n### Known Technical Limitations\n\n#### Technical Limitations\n\n\u0026#x3C;!-- info: Describe any known technical limitations, such as spurrious correlations, train/test overlap, annotation biases, or mis-annotations, and cite the works that first identified these limitations when possible. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nSome subtitles contain typos that are caused by inaccurate OCR.\n\n#### Unsuited Applications\n\n\u0026#x3C;!-- info: When using a model trained on this dataset in a setting where users or the public may interact with its predictions, what are some pitfalls to look out for? In particular, describe some applications of the general task featured in this dataset that its curation or properties make it less suitable for. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nThe models might memorize individual subtitles of existing movies and TV shows, but there is no context across sentence boundaries in the data.\n\n#### Discouraged Use Cases\n\n\u0026#x3C;!-- info: What are some discouraged use cases of a model trained to maximize the proposed metrics on this dataset? In particular, think about settings where decisions made by a model that performs reasonably well on the metric my still have strong negative consequences for user or members of the public. --\u003e\n\u0026#x3C;!-- scope: microscope --\u003e\nA general issue with paraphrasing is that very small modifications in the surface form might produce valid paraphrases, which are however rather uninteresting. It is more valuable to produce paraphrases with clearly different surface realizations (e.g., measured using minimum edit distance).\n\n\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n\u003c/div\u003e","title":"opusparcus","type":"Paraphrasing","languages":"German, English, Finnish, French, Russian, Swedish","summary":"Opusparcus is a paraphrase corpus for six European languages - German, English, Finnish, French, Russian, and Swedish. The paraphrases consist of subtitles from movies and TV shows."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"opusparcus"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/schema_guided_dialog.html b/data_cards/schema_guided_dialog.html
index 828ef387..976203b7 100644
--- a/data_cards/schema_guided_dialog.html
+++ b/data_cards/schema_guided_dialog.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->schema_guided_dialog</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">schema_guided_dialog</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->schema_guided_dialog</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">schema_guided_dialog</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Dialog Response Generation</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -2059,4 +2059,4 @@ <h5>Unsuited Applications
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"schema_guided_dialog","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eschema_guided_dialog\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe GEM version of this dataset functions as a response generation dataset. The input specifies dialog acts\n          that a model needs to verbalize. The Schema-Guided Dialog dataset is challenging since it comprises multiple\n          domains from hotel and travel to restaurants, and a wide range of dialog acts. The context of each\n          conversation is provided as well.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/schema_guided_dialog')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/schema_guided_dialog\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1909.05855\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAbhinav Rastogi, Xiaoxue Zang, Srinivas Sunkara, Raghav Gupta, Pranav Khaitan, Amir Fayazi, Maria Wang, and\n          Guan-Lin Chao\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAbhinav Rastogi\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe goal of a speaker who generates the target utterance is to help users accomplish tasks including but not\n          limited to finding flights, booking restaurants, searching for nearby events and movies.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[Github[(\u003ca\n                href=\"https://github.com/google-research-datasets/dstc8-schema-guided-dialogue\"\u003ehttps://github.com/google-research-datasets/dstc8-schema-guided-dialogue\u003c/a\u003e)\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1909.05855\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n@inproceedings{rastogi2020towards,\ntitle={Towards scalable multi-domain conversational agents: The schema-guided dialogue dataset},\nauthor={Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav and Khaitan, Pranav},\nbooktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\nvolume={34},\nnumber={05},\npages={8689--8696},\nyear={2020}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbhinav Rastogi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:schema-guided-dst@google.com\"\u003eschema-guided-dst@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language structure is machine-generated, and the language realizations are produced by crowd workers.\n              The dataset paper does not provide demographic information for the crowd workers.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Schema-Guided Dialogue (SGD) dataset contains 18K multi-domain task-oriented dialogues between a\n              human and a virtual assistant, which covers 17 domains ranging from banks and events to media, calendar,\n              travel, and weather.\n              The language presents in the datset is only English.\n              The SGD dataset provides a challenging testbed for a number of tasks in task-oriented dialogue, including\n              language understanding, slot filling, dialogue state tracking and response generation.\n              For the creation of the SGD dataset, they developed a multi-domain dialogue simulator that generates\n              dialogue outlines over an arbitrary combination of APIs, dialogue states and system actions. Then, they\n              used a crowd-sourcing procedure to paraphrase these outlines to natural language utterances.\n              This novel crowd-sourcing procedure preserves all annotations obtained from the simulator and does not\n              require any extra annotations after dialogue collection.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal of a speaker who generates the target utterance is to help users accomplish tasks including but\n              not limited to finding flights, booking restaurants, searching for nearby events and movies.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbhinav Rastogi, Xiaoxue Zang, Srinivas Sunkara, Raghav Gupta, Pranav Khaitan, Amir Fayazi, Maria Wang,\n              and Guan-Lin Chao\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWanyu Du wrote the initial data card and Yacine Jernite the data loader. Simon Mille updated the data\n              card with the additional splits. Sebastian Gehrmann migrated the data card and loader to the v2 version\n              and extended the missing information.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach dialog instance has the following fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003edialogue_id\u003c/code\u003e: A unique identifier for a dialogue.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eservices\u003c/code\u003e: A list of services present in the dialogue.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eturns\u003c/code\u003e: A list of annotated system or user utterances. Each turn consists of the following\n                fields:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003espeaker\u003c/code\u003e: The speaker for the turn, either \u003ccode\u003eUSER\u003c/code\u003e or \u003ccode\u003eSYSTEM\u003c/code\u003e.\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003eutterance\u003c/code\u003e: A string containing the natural language utterance.\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003eframes\u003c/code\u003e: A list of frames, each frame containing annotations for a single service and\n                    consists of the following fields:\n                    \u003cul\u003e\n                      \u003cli\u003e\u003ccode\u003eservice\u003c/code\u003e: The name of the service corresponding to the frame. The slots and\n                        intents used in the following fields are taken from the schema of this service.\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eslots\u003c/code\u003e: A list of slot spans in the utterance, only provided for non-categorical\n                        slots. Each slot span contains the following fields:\n                        \u003cul\u003e\n                          \u003cli\u003e\u003ccode\u003eslot\u003c/code\u003e: The name of the slot.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003estart\u003c/code\u003e: The index of the starting character in the utterance corresponding to\n                            the slot value.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003eexclusive_end\u003c/code\u003e: The index of the character just after the last character\n                            corresponding to the slot value in the utterance.\u003c/li\u003e\n                        \u003c/ul\u003e\n                      \u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eactions\u003c/code\u003e: A list of actions corresponding to the system. Each action has the\n                        following fields:\n                        \u003cul\u003e\n                          \u003cli\u003e\u003ccode\u003eact\u003c/code\u003e: The type of action.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003eslot\u003c/code\u003e: (optional) A slot argument for some of the actions.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003evalues\u003c/code\u003e: (optional) A list of values assigned to the slot. If the values list\n                            is non-empty, then the slot must be present.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003ecanonical_values\u003c/code\u003e: (optional) The values in their canonicalized form as used\n                            by the service. It is a list of strings of the same length as values.\u003c/li\u003e\n                        \u003c/ul\u003e\n                      \u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eservice_call\u003c/code\u003e: (system turns only, optional) The request sent to the service. It\n                        consists of the following fields:\n                        \u003cul\u003e\n                          \u003cli\u003e\u003ccode\u003emethod\u003c/code\u003e: The name of the intent or function of the service or API being\n                            executed.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003eparameters\u003c/code\u003e: A pair of lists of the same lengths:\n                            \u003ccode\u003eparameter_slot_name\u003c/code\u003e contains slot names and\n                            \u003ccode\u003eparameter_canonical_value\u003c/code\u003e contains the corresponding values in their\n                            canonicalized form.\u003c/li\u003e\n                        \u003c/ul\u003e\n                      \u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eservice_results\u003c/code\u003e: (system turns only, optional) A list of entities containing the\n                        results obtained from the service. It is only available for turns in which a service call is\n                        made. Each entity is represented as a pair of lists of the same length:\n                        \u003ccode\u003eservice_slot_name\u003c/code\u003e contains slot names and \u003ccode\u003eservice_canonical_value\u003c/code\u003e\n                        contains the corresponding canonical values.\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003estate\u003c/code\u003e: (user turns only) The dialogue state corresponding to the service. It\n                        consists of the following fields:\n                        \u003cul\u003e\n                          \u003cli\u003e\u003ccode\u003eactive_intent\u003c/code\u003e: The intent corresponding to the service of the frame which is\n                            currently being fulfilled by the system. It takes the value \"NONE\" if none of the intents\n                            are active.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003erequested_slots\u003c/code\u003e: A list of slots requested by the user in the current turn.\n                          \u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003eslot_values\u003c/code\u003e: A pair of lists of the same lengths: \u003ccode\u003eslot_name\u003c/code\u003e\n                            contains slot names and \u003ccode\u003eslot_value_list\u003c/code\u003e contains the corresponding lists of\n                            strings. For categorical slots, this list contains a single value assigned to the slot. For\n                            non-categorical slots, all the values in this list are spoken variations of each other and\n                            are equivalent (e.g, \"6 pm\", \"six in the evening\", \"evening at 6\" etc.).\u003c/li\u003e\n                        \u003c/ul\u003e\n                      \u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'dialogue_id': '1_00000',\n'services': ['Restaurants_1'],\n'turns':\n{'frames':\n[{'actions': [{'act': [6],\n'canonical_values': [['FindRestaurants']],\n'slot': ['intent'],\n'values': [['FindRestaurants']]}],\n'service': ['Restaurants_1'],\n'service_call': [{'method': '',\n'parameters': {'parameter_canonical_value': [],\n 'parameter_slot_name': []}}],\n'service_results': [{'service_results_list': []}],\n'slots': [{'exclusive_end': [], 'slot': [], 'start': []}],\n'state': [{'active_intent': 'FindRestaurants',\n       'requested_slots': [],\n       'slot_values': {'slot_name': [], 'slot_value_list': []}}]},\n{'actions': [{'act': [13],\n'canonical_values': [[]],\n'slot': ['city'],\n'values': [[]]}],\n'service': ['Restaurants_1'],\n'service_call': [{'method': '',\n'parameters': {'parameter_canonical_value': [],\n 'parameter_slot_name': []}}],\n'service_results': [{'service_results_list': []}],\n'slots': [{'exclusive_end': [], 'slot': [], 'start': []}],\n'state': [{'active_intent': '',\n       'requested_slots': [],\n       'slot_values': {'slot_name': [], 'slot_value_list': []}}]},\n...,]}\n'speaker': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],\n'utterance': [\n'I am feeling hungry so I would like to find a place to eat.',\n'Do you have a specific which you want the eating place to be located at?',\n'I would like for it to be in San Jose.',\n'Is there a specific cuisine type you enjoy, such as Mexican, Italian or something else?',\n'I usually like eating the American type of food.',\n'I see that at 71 Saint Peter there is a good restaurant which is in San Jose.',\n'Can you give me the address of this restaurant.',\n'If you want to go to this restaurant you can find it at 71 North San Pedro Street.',\n'Can you give me the phone number that I can contact them with?',\n'If you want to phone them you can at 408-971-8523.',\n'Is there some other restaurant which you can suggest?',\n'How would you like Bazille restaurant which is situated in San Jose.',\n'Do you have another restaurant matching my needs? For example a restaurant which is economical and is located in Palo Alto.',\n'I see that 7 restaurants suit to what you requested. Bird Dog seems as a good restaurant and is located in Palo Alto.',\n'Alright, that seems good. I would like to make a booking at this restaurant.',\n'For which time do you want the booking to be?',\n'I will be eating there at 11:30 am so make it for then.',\n'Can you please confirm that you want to book a table for 2 at 11:30 am at the Bird Dog restaurant in Palo Alto for today.',\n'That suits me well. Can you tell me if they feature live music?',\n'Your booking has been made without errors, but unfortunately they do not have live music.',\n'Will I be able to find liquor there? Can you give me the address of their location?',\n'The restaurant is located at 420 Ramona Street. Unfortunately they do not serve alcohol at the restaurant.',\n'I appreciate it very much. That would be all.',\n'Have a good time!'\n]}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is split into a train, validation, and test set with the following sizes:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eValidation\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# of dialogues\u003c/td\u003e\n                    \u003ctd\u003e16142\u003c/td\u003e\n                    \u003ctd\u003e2482\u003c/td\u003e\n                    \u003ctd\u003e4201\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# of turns\u003c/td\u003e\n                    \u003ctd\u003e48426\u003c/td\u003e\n                    \u003ctd\u003e7446\u003c/td\u003e\n                    \u003ctd\u003e12603\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is generally split i.i.d, but some topics only appear in training and some only for testing. For\n              example, the domains Messaging, Payment, and Train are test-only.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset comprises a wide range of dialog capabilities and thus enables the evaluation of many more\n              generation capabilities of comparable datasets. Its collection methodology ensures a high diversity but\n              also high quality of the data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe domains a lot more diverse than other datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esurface realization, compositionality.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points modified\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe are focusing on the response-generation part of the dataset and thus reformatted the dataset to treat\n              the service agent utterances as the targets to be generated and the previous customer utterance and the\n              agent's dialog act as the input. We additionally reformat the dialog acts to directly conform to the\n              format described in this \u003ca href=\"https://arxiv.org/abs/2004.15006\"\u003epaper\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e9 challenge sets for Schema-Guided Dialog were added to the GEM evaluation suite.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eWe created subsets of the training and development sets of 500 randomly selected inputs each.\u003c/li\u003e\n              \u003cli\u003eWe applied 5 transformations to respectively 5 sets of 500 randomly selected inputs: (i)\n                back-translation, (ii)-(iii) introduction of typographical errors, using Butterfingers with two\n                thresholds (0.02 and 0.05), resulting in two sets with different amounts of typos introduced (there are\n                more typos with the 0.05 threshold than with the 0.02 one), (iv) removal of final punctuations (when\n                any), and (v) input scrambling, for which the order of the dialogue acts was randomly reassigned.\u003c/li\u003e\n              \u003cli\u003eFor the input size, we created subpopulations based on the number of dialogue acts in the input.\u003c/li\u003e\n            \u003c/ol\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eDA number\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                    \u003ctd\u003e5049\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e2517\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e1328\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e469\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e335\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e256\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e46\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eWe also split the test data according to the type of dialogue act, represented by cardinal numbers in the\n              dataset.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eDA type\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e1397\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e983\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e1027\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e958\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                    \u003ctd\u003e72\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                    \u003ctd\u003e1024\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                    \u003ctd\u003e1246\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e13\u003c/td\u003e\n                    \u003ctd\u003e2078\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e15\u003c/td\u003e\n                    \u003ctd\u003e715\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization and Robustness.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/pdf/1909.05855.pdf\"\u003ePaper for dataset and DST baseline\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/pdf/2002.01359.pdf\"\u003eDSTC8 overview paper\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://github.com/google-research/google-research/tree/master/schema_guided_dst\"\u003eCode for\n                  DST baseline\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/pdf/2004.15006.pdf\"\u003eNatural language generation baseline paper\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://ai.googleblog.com/2019/10/introducing-schema-guided-dialogue.html\"\u003eBlog post\n                  announcing the dataset\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSurface realization and compositionally.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEURT\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original paper focused on the task of dialog state prediction instead of response generation and thus\n              did not suggest any set of metrics.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePrevious multi-domain task-oriented dialogue datsets do not sufficiently capture the real-world\n              challenges in virtual assistants, since they cover few domains and assume a single static ontology per\n              domain.\n              The SGD datset is created to cover 17 domains with over 16K dialogues, and contain multiple different APIs\n              in most domains, many of which have overlapping functionalities but different interfaces, which reflects\n              common real-world scenarios.\n              The wide range of available annotations can be used for intent prediction, slot filling, dialogue state\n              tracking, policy imitation learning, language generation, user simulation learning, among other tasks in\n              large-scale virtual assistants.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal of a speaker who generates the target utterance is to help users accomplish tasks including but\n              not limited to finding flights, booking restaurants, searching for nearby events and movies.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMachine-generated\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGeneration Method Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf text was machine-generated for the dataset, provide a link to the generation method if available\n                    (N/A otherwise).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/dstc8-schema-guided-dialogue\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dialogue outlines are first generated by a simulator. The dialogue simulator interacts with the\n              services to generate dialogue outlines. It consists of two agents playing the roles of the user and the\n              system, interacting with each other using a finite set of actions specified through dialogue acts over a\n              probabilistic automaton designed to capture varied dialogue trajectories. It is worth noting that the\n              simulation automaton does not include any domain-specific constraints: all domain-specific constraints are\n              encoded in the schema and scenario.\u003c/p\u003e\n            \u003cp\u003eThe dialogue paraphrasing framework then converts the outlines generated by the simulator into a natural\n              conversation. Users may refer to the slot values in the dialogue acts in various different ways during the\n              conversation, e.g., “los angeles” may be referred to as “LA” or “LAX”. To introduce these natural\n              variations in the slot values, different slot values are replaced with a randomly selected variation while\n              being kept consistent across user turns in a dialogue. The actions are then converted to pseudo-natural\n              language utterances using a set of manually defined action-to-text templates, and the resulting utterances\n              for the different actions in a turn are concatenated together.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset covers the following domains: Alarm, Banks, Buses, Calendar, Events, Flights, Homes, Hotels,\n              Media, Messaging, Movies, Music, Payment, RentalCars, Restaurants, RideSharing, Services, Train, Travel,\n              and Weather. The domain ‘Service’ includes salons, dentists, doctors etc. The ‘Alarm’, ‘Messaging’,\n              ‘Payment’ and ‘Train’ domains are only present in the dev or test sets.\n              to test generalization to new domains.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dialogue transformed by these steps is sent to the crowd workers to be reformulated into more natural\n              language. One crowd worker is tasked with paraphrasing all utterances of a dialogue to ensure naturalness\n              and coherence. The crowd workers are asked to exactly repeat the slot values in their paraphrases so that\n              the span indices for the slots can be recovered via string matching.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWhile no policy is reported, we assume that one was in place for the collection.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe SGD dataset does not use identity categories and does not contain sensitive data.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDue to the combination of the automatic generation and crowd rater paraphasing, the language can be very\n              formulaic. While this may be acceptable for the model part (i.e., we may actually desire an automated\n              agent to form formulaic responses), the input utterances of the simulated customers likely do not cover\n              the entire spectrum of the English language.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dialogues under each domain distributed unevenly, where the flights domain has 3644 dialogues while\n              the payment domain only contains 222 dialogues.\n              Besides, all dialogues are paraphrased by crowd-workers, and it is possible that crow-workers with\n              different culture backgrounds will exhibit biased opinions.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the initial data was automatically generated, the coverage of entity names is necessarily biased.\n              An agent thus needs to be evaluated in a more realistic environment.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"schema_guided_dialog","type":"Dialog Response Generation","languages":"English","summary":"The GEM version of this dataset functions as a response generation dataset. The input specifies dialog acts that a model needs to verbalize. The Schema-Guided Dialog dataset is challenging since it comprises multiple domains from hotel and travel to restaurants, and a wide range of dialog acts. The context of each conversation is provided as well."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"schema_guided_dialog"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"schema_guided_dialog","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eschema_guided_dialog\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe GEM version of this dataset functions as a response generation dataset. The input specifies dialog acts\n          that a model needs to verbalize. The Schema-Guided Dialog dataset is challenging since it comprises multiple\n          domains from hotel and travel to restaurants, and a wide range of dialog acts. The context of each\n          conversation is provided as well.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/schema_guided_dialog')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/schema_guided_dialog\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1909.05855\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAbhinav Rastogi, Xiaoxue Zang, Srinivas Sunkara, Raghav Gupta, Pranav Khaitan, Amir Fayazi, Maria Wang, and\n          Guan-Lin Chao\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAbhinav Rastogi\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe goal of a speaker who generates the target utterance is to help users accomplish tasks including but not\n          limited to finding flights, booking restaurants, searching for nearby events and movies.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e[Github[(\u003ca\n                href=\"https://github.com/google-research-datasets/dstc8-schema-guided-dialogue\"\u003ehttps://github.com/google-research-datasets/dstc8-schema-guided-dialogue\u003c/a\u003e)\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1909.05855\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n@inproceedings{rastogi2020towards,\ntitle={Towards scalable multi-domain conversational agents: The schema-guided dialogue dataset},\nauthor={Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav and Khaitan, Pranav},\nbooktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\nvolume={34},\nnumber={05},\npages={8689--8696},\nyear={2020}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbhinav Rastogi\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:schema-guided-dst@google.com\"\u003eschema-guided-dst@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language structure is machine-generated, and the language realizations are produced by crowd workers.\n              The dataset paper does not provide demographic information for the crowd workers.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Schema-Guided Dialogue (SGD) dataset contains 18K multi-domain task-oriented dialogues between a\n              human and a virtual assistant, which covers 17 domains ranging from banks and events to media, calendar,\n              travel, and weather.\n              The language presents in the datset is only English.\n              The SGD dataset provides a challenging testbed for a number of tasks in task-oriented dialogue, including\n              language understanding, slot filling, dialogue state tracking and response generation.\n              For the creation of the SGD dataset, they developed a multi-domain dialogue simulator that generates\n              dialogue outlines over an arbitrary combination of APIs, dialogue states and system actions. Then, they\n              used a crowd-sourcing procedure to paraphrase these outlines to natural language utterances.\n              This novel crowd-sourcing procedure preserves all annotations obtained from the simulator and does not\n              require any extra annotations after dialogue collection.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDialog Response Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal of a speaker who generates the target utterance is to help users accomplish tasks including but\n              not limited to finding flights, booking restaurants, searching for nearby events and movies.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbhinav Rastogi, Xiaoxue Zang, Srinivas Sunkara, Raghav Gupta, Pranav Khaitan, Amir Fayazi, Maria Wang,\n              and Guan-Lin Chao\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWanyu Du wrote the initial data card and Yacine Jernite the data loader. Simon Mille updated the data\n              card with the additional splits. Sebastian Gehrmann migrated the data card and loader to the v2 version\n              and extended the missing information.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach dialog instance has the following fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003edialogue_id\u003c/code\u003e: A unique identifier for a dialogue.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eservices\u003c/code\u003e: A list of services present in the dialogue.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eturns\u003c/code\u003e: A list of annotated system or user utterances. Each turn consists of the following\n                fields:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003espeaker\u003c/code\u003e: The speaker for the turn, either \u003ccode\u003eUSER\u003c/code\u003e or \u003ccode\u003eSYSTEM\u003c/code\u003e.\n                  \u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003eutterance\u003c/code\u003e: A string containing the natural language utterance.\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003eframes\u003c/code\u003e: A list of frames, each frame containing annotations for a single service and\n                    consists of the following fields:\n                    \u003cul\u003e\n                      \u003cli\u003e\u003ccode\u003eservice\u003c/code\u003e: The name of the service corresponding to the frame. The slots and\n                        intents used in the following fields are taken from the schema of this service.\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eslots\u003c/code\u003e: A list of slot spans in the utterance, only provided for non-categorical\n                        slots. Each slot span contains the following fields:\n                        \u003cul\u003e\n                          \u003cli\u003e\u003ccode\u003eslot\u003c/code\u003e: The name of the slot.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003estart\u003c/code\u003e: The index of the starting character in the utterance corresponding to\n                            the slot value.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003eexclusive_end\u003c/code\u003e: The index of the character just after the last character\n                            corresponding to the slot value in the utterance.\u003c/li\u003e\n                        \u003c/ul\u003e\n                      \u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eactions\u003c/code\u003e: A list of actions corresponding to the system. Each action has the\n                        following fields:\n                        \u003cul\u003e\n                          \u003cli\u003e\u003ccode\u003eact\u003c/code\u003e: The type of action.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003eslot\u003c/code\u003e: (optional) A slot argument for some of the actions.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003evalues\u003c/code\u003e: (optional) A list of values assigned to the slot. If the values list\n                            is non-empty, then the slot must be present.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003ecanonical_values\u003c/code\u003e: (optional) The values in their canonicalized form as used\n                            by the service. It is a list of strings of the same length as values.\u003c/li\u003e\n                        \u003c/ul\u003e\n                      \u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eservice_call\u003c/code\u003e: (system turns only, optional) The request sent to the service. It\n                        consists of the following fields:\n                        \u003cul\u003e\n                          \u003cli\u003e\u003ccode\u003emethod\u003c/code\u003e: The name of the intent or function of the service or API being\n                            executed.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003eparameters\u003c/code\u003e: A pair of lists of the same lengths:\n                            \u003ccode\u003eparameter_slot_name\u003c/code\u003e contains slot names and\n                            \u003ccode\u003eparameter_canonical_value\u003c/code\u003e contains the corresponding values in their\n                            canonicalized form.\u003c/li\u003e\n                        \u003c/ul\u003e\n                      \u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003eservice_results\u003c/code\u003e: (system turns only, optional) A list of entities containing the\n                        results obtained from the service. It is only available for turns in which a service call is\n                        made. Each entity is represented as a pair of lists of the same length:\n                        \u003ccode\u003eservice_slot_name\u003c/code\u003e contains slot names and \u003ccode\u003eservice_canonical_value\u003c/code\u003e\n                        contains the corresponding canonical values.\u003c/li\u003e\n                      \u003cli\u003e\u003ccode\u003estate\u003c/code\u003e: (user turns only) The dialogue state corresponding to the service. It\n                        consists of the following fields:\n                        \u003cul\u003e\n                          \u003cli\u003e\u003ccode\u003eactive_intent\u003c/code\u003e: The intent corresponding to the service of the frame which is\n                            currently being fulfilled by the system. It takes the value \"NONE\" if none of the intents\n                            are active.\u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003erequested_slots\u003c/code\u003e: A list of slots requested by the user in the current turn.\n                          \u003c/li\u003e\n                          \u003cli\u003e\u003ccode\u003eslot_values\u003c/code\u003e: A pair of lists of the same lengths: \u003ccode\u003eslot_name\u003c/code\u003e\n                            contains slot names and \u003ccode\u003eslot_value_list\u003c/code\u003e contains the corresponding lists of\n                            strings. For categorical slots, this list contains a single value assigned to the slot. For\n                            non-categorical slots, all the values in this list are spoken variations of each other and\n                            are equivalent (e.g, \"6 pm\", \"six in the evening\", \"evening at 6\" etc.).\u003c/li\u003e\n                        \u003c/ul\u003e\n                      \u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'dialogue_id': '1_00000',\n'services': ['Restaurants_1'],\n'turns':\n{'frames':\n[{'actions': [{'act': [6],\n'canonical_values': [['FindRestaurants']],\n'slot': ['intent'],\n'values': [['FindRestaurants']]}],\n'service': ['Restaurants_1'],\n'service_call': [{'method': '',\n'parameters': {'parameter_canonical_value': [],\n 'parameter_slot_name': []}}],\n'service_results': [{'service_results_list': []}],\n'slots': [{'exclusive_end': [], 'slot': [], 'start': []}],\n'state': [{'active_intent': 'FindRestaurants',\n       'requested_slots': [],\n       'slot_values': {'slot_name': [], 'slot_value_list': []}}]},\n{'actions': [{'act': [13],\n'canonical_values': [[]],\n'slot': ['city'],\n'values': [[]]}],\n'service': ['Restaurants_1'],\n'service_call': [{'method': '',\n'parameters': {'parameter_canonical_value': [],\n 'parameter_slot_name': []}}],\n'service_results': [{'service_results_list': []}],\n'slots': [{'exclusive_end': [], 'slot': [], 'start': []}],\n'state': [{'active_intent': '',\n       'requested_slots': [],\n       'slot_values': {'slot_name': [], 'slot_value_list': []}}]},\n...,]}\n'speaker': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],\n'utterance': [\n'I am feeling hungry so I would like to find a place to eat.',\n'Do you have a specific which you want the eating place to be located at?',\n'I would like for it to be in San Jose.',\n'Is there a specific cuisine type you enjoy, such as Mexican, Italian or something else?',\n'I usually like eating the American type of food.',\n'I see that at 71 Saint Peter there is a good restaurant which is in San Jose.',\n'Can you give me the address of this restaurant.',\n'If you want to go to this restaurant you can find it at 71 North San Pedro Street.',\n'Can you give me the phone number that I can contact them with?',\n'If you want to phone them you can at 408-971-8523.',\n'Is there some other restaurant which you can suggest?',\n'How would you like Bazille restaurant which is situated in San Jose.',\n'Do you have another restaurant matching my needs? For example a restaurant which is economical and is located in Palo Alto.',\n'I see that 7 restaurants suit to what you requested. Bird Dog seems as a good restaurant and is located in Palo Alto.',\n'Alright, that seems good. I would like to make a booking at this restaurant.',\n'For which time do you want the booking to be?',\n'I will be eating there at 11:30 am so make it for then.',\n'Can you please confirm that you want to book a table for 2 at 11:30 am at the Bird Dog restaurant in Palo Alto for today.',\n'That suits me well. Can you tell me if they feature live music?',\n'Your booking has been made without errors, but unfortunately they do not have live music.',\n'Will I be able to find liquor there? Can you give me the address of their location?',\n'The restaurant is located at 420 Ramona Street. Unfortunately they do not serve alcohol at the restaurant.',\n'I appreciate it very much. That would be all.',\n'Have a good time!'\n]}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is split into a train, validation, and test set with the following sizes:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eValidation\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# of dialogues\u003c/td\u003e\n                    \u003ctd\u003e16142\u003c/td\u003e\n                    \u003ctd\u003e2482\u003c/td\u003e\n                    \u003ctd\u003e4201\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e# of turns\u003c/td\u003e\n                    \u003ctd\u003e48426\u003c/td\u003e\n                    \u003ctd\u003e7446\u003c/td\u003e\n                    \u003ctd\u003e12603\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is generally split i.i.d, but some topics only appear in training and some only for testing. For\n              example, the domains Messaging, Payment, and Train are test-only.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset comprises a wide range of dialog capabilities and thus enables the evaluation of many more\n              generation capabilities of comparable datasets. Its collection methodology ensures a high diversity but\n              also high quality of the data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe domains a lot more diverse than other datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esurface realization, compositionality.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points modified\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe are focusing on the response-generation part of the dataset and thus reformatted the dataset to treat\n              the service agent utterances as the targets to be generated and the previous customer utterance and the\n              agent's dialog act as the input. We additionally reformat the dialog acts to directly conform to the\n              format described in this \u003ca href=\"https://arxiv.org/abs/2004.15006\"\u003epaper\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e9 challenge sets for Schema-Guided Dialog were added to the GEM evaluation suite.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eWe created subsets of the training and development sets of 500 randomly selected inputs each.\u003c/li\u003e\n              \u003cli\u003eWe applied 5 transformations to respectively 5 sets of 500 randomly selected inputs: (i)\n                back-translation, (ii)-(iii) introduction of typographical errors, using Butterfingers with two\n                thresholds (0.02 and 0.05), resulting in two sets with different amounts of typos introduced (there are\n                more typos with the 0.05 threshold than with the 0.02 one), (iv) removal of final punctuations (when\n                any), and (v) input scrambling, for which the order of the dialogue acts was randomly reassigned.\u003c/li\u003e\n              \u003cli\u003eFor the input size, we created subpopulations based on the number of dialogue acts in the input.\u003c/li\u003e\n            \u003c/ol\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eDA number\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                    \u003ctd\u003e5049\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e2517\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e1328\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e469\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e335\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e256\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e46\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eWe also split the test data according to the type of dialogue act, represented by cardinal numbers in the\n              dataset.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eDA type\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e1397\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e983\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e1027\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e958\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                    \u003ctd\u003e72\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                    \u003ctd\u003e1024\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                    \u003ctd\u003e1246\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e13\u003c/td\u003e\n                    \u003ctd\u003e2078\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e15\u003c/td\u003e\n                    \u003ctd\u003e715\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGeneralization and Robustness.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/pdf/1909.05855.pdf\"\u003ePaper for dataset and DST baseline\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/pdf/2002.01359.pdf\"\u003eDSTC8 overview paper\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://github.com/google-research/google-research/tree/master/schema_guided_dst\"\u003eCode for\n                  DST baseline\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/pdf/2004.15006.pdf\"\u003eNatural language generation baseline paper\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://ai.googleblog.com/2019/10/introducing-schema-guided-dialogue.html\"\u003eBlog post\n                  announcing the dataset\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSurface realization and compositionally.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEURT\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original paper focused on the task of dialog state prediction instead of response generation and thus\n              did not suggest any set of metrics.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePrevious multi-domain task-oriented dialogue datsets do not sufficiently capture the real-world\n              challenges in virtual assistants, since they cover few domains and assume a single static ontology per\n              domain.\n              The SGD datset is created to cover 17 domains with over 16K dialogues, and contain multiple different APIs\n              in most domains, many of which have overlapping functionalities but different interfaces, which reflects\n              common real-world scenarios.\n              The wide range of available annotations can be used for intent prediction, slot filling, dialogue state\n              tracking, policy imitation learning, language generation, user simulation learning, among other tasks in\n              large-scale virtual assistants.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal of a speaker who generates the target utterance is to help users accomplish tasks including but\n              not limited to finding flights, booking restaurants, searching for nearby events and movies.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMachine-generated\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGeneration Method Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf text was machine-generated for the dataset, provide a link to the generation method if available\n                    (N/A otherwise).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/dstc8-schema-guided-dialogue\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dialogue outlines are first generated by a simulator. The dialogue simulator interacts with the\n              services to generate dialogue outlines. It consists of two agents playing the roles of the user and the\n              system, interacting with each other using a finite set of actions specified through dialogue acts over a\n              probabilistic automaton designed to capture varied dialogue trajectories. It is worth noting that the\n              simulation automaton does not include any domain-specific constraints: all domain-specific constraints are\n              encoded in the schema and scenario.\u003c/p\u003e\n            \u003cp\u003eThe dialogue paraphrasing framework then converts the outlines generated by the simulator into a natural\n              conversation. Users may refer to the slot values in the dialogue acts in various different ways during the\n              conversation, e.g., “los angeles” may be referred to as “LA” or “LAX”. To introduce these natural\n              variations in the slot values, different slot values are replaced with a randomly selected variation while\n              being kept consistent across user turns in a dialogue. The actions are then converted to pseudo-natural\n              language utterances using a set of manually defined action-to-text templates, and the resulting utterances\n              for the different actions in a turn are concatenated together.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset covers the following domains: Alarm, Banks, Buses, Calendar, Events, Flights, Homes, Hotels,\n              Media, Messaging, Movies, Music, Payment, RentalCars, Restaurants, RideSharing, Services, Train, Travel,\n              and Weather. The domain ‘Service’ includes salons, dentists, doctors etc. The ‘Alarm’, ‘Messaging’,\n              ‘Payment’ and ‘Train’ domains are only present in the dev or test sets.\n              to test generalization to new domains.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dialogue transformed by these steps is sent to the crowd workers to be reformulated into more natural\n              language. One crowd worker is tasked with paraphrasing all utterances of a dialogue to ensure naturalness\n              and coherence. The crowd workers are asked to exactly repeat the slot values in their paraphrases so that\n              the span indices for the slots can be recovered via string matching.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWhile no policy is reported, we assume that one was in place for the collection.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe SGD dataset does not use identity categories and does not contain sensitive data.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDue to the combination of the automatic generation and crowd rater paraphasing, the language can be very\n              formulaic. While this may be acceptable for the model part (i.e., we may actually desire an automated\n              agent to form formulaic responses), the input utterances of the simulated customers likely do not cover\n              the entire spectrum of the English language.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dialogues under each domain distributed unevenly, where the flights domain has 3644 dialogues while\n              the payment domain only contains 222 dialogues.\n              Besides, all dialogues are paraphrased by crowd-workers, and it is possible that crow-workers with\n              different culture backgrounds will exhibit biased opinions.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the initial data was automatically generated, the coverage of entity names is necessarily biased.\n              An agent thus needs to be evaluated in a more realistic environment.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"schema_guided_dialog","type":"Dialog Response Generation","languages":"English","summary":"The GEM version of this dataset functions as a response generation dataset. The input specifies dialog acts that a model needs to verbalize. The Schema-Guided Dialog dataset is challenging since it comprises multiple domains from hotel and travel to restaurants, and a wide range of dialog acts. The context of each conversation is provided as well."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"schema_guided_dialog"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/sportsett_basketball.html b/data_cards/sportsett_basketball.html
index 76b94355..bb7a47e8 100644
--- a/data_cards/sportsett_basketball.html
+++ b/data_cards/sportsett_basketball.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->sportsett_basketball</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">sportsett_basketball</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->sportsett_basketball</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">sportsett_basketball</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
 
 
   <section class="datacard-section">
@@ -2942,4 +2942,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"sportsett_basketball","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003esportsett_basketball\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe sportsett dataset is an English data-to-text dataset in the basketball domain. The inputs are statistics\n          summarizing an NBA game and the outputs are high-quality descriptions of the game in natural language.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/sportsett_basketball')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/sportsett_basketball\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/nlgcat/sport_sett_basketball\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.intellang-1.4/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eCraig Thomson, Ashish Upadhyay\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eCraig Thomson\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003emit: MIT License\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eA model trained on this dataset should summarise the statistical and other information from a basketball\n          game. This will be focused on a single game, although facts from prior games, or aggregate statistics over\n          many games can and should be used for comparison where appropriate. There no single common narrative, although\n          summaries usually start with who player, when, where, and the score. They then provide high level commentary\n          on what the difference in the game was (why the winner won). breakdowns of statistics for prominent players\n          follow, winning team first. Finally, the upcoming schedule for both teams is usually included. There are,\n          however, other types of fact that can be included, and other narrative structures.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/nlgcat/sport_sett_basketball\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/nlgcat/sport_sett_basketball\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.intellang-1.4/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{thomson-etal-2020-sportsett,\ntitle = \"{S}port{S}ett:Basketball - A robust and maintainable data-set for Natural Language Generation\",\nauthor = \"Thomson, Craig  and\nReiter, Ehud  and\nSripada, Somayajulu\",\nbooktitle = \"Proceedings of the Workshop on Intelligent Information Processing and Natural Language Generation\",\nmonth = sep,\nyear = \"2020\",\naddress = \"Santiago de Compostela, Spain\",\npublisher = \"Association for Computational Lingustics\",\nurl = \"https://aclanthology.org/2020.intellang-1.4\",\npages = \"32--40\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCraig Thomson\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:c.thomson@abdn.ac.uk\"\u003ec.thomson@abdn.ac.uk\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAmerican English\u003c/p\u003e\n            \u003cp\u003eOne dialect, one language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAmerican sports writers\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emit: MIT License\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMaintain a robust and scalable Data-to-Text generation resource with structured data and textual\n              summaries\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA model trained on this dataset should summarise the statistical and other information from a basketball\n              game. This will be focused on a single game, although facts from prior games, or aggregate statistics over\n              many games can and should be used for comparison where appropriate. There no single common narrative,\n              although summaries usually start with who player, when, where, and the score. They then provide high level\n              commentary on what the difference in the game was (why the winner won). breakdowns of statistics for\n              prominent players follow, winning team first. Finally, the upcoming schedule for both teams is usually\n              included. There are, however, other types of fact that can be included, and other narrative structures.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Aberdeen, Robert Gordon University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCraig Thomson, Ashish Upadhyay\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEPSRC\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCraig Thomson, Ashish Upadhyay\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach instance in the dataset has five fields.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"sportsett_id\": This is a unique id as used in the original SportSett database. It starts with '1'\n                  with the first instance in the train-set and ends with '6150' with the last instance in test-set.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"gem_id\": This is a unique id created as per GEM's requirement which follows the\n                  \u003ccode\u003eGEM-${DATASET_NAME}-${SPLIT-NAME}-${id}\u003c/code\u003e pattern.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"game\": This field contains a dictionary with information about current game. It has information such\n                  as date on which the game was played alongwith the stadium, city, state where it was played.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"teams\": This filed is a dictionary of multiple nested dictionaries. On the highest level, it has two\n                  keys: 'home' and 'vis', which provide the stats for home team and visiting team of the game. Both are\n                  dictionaries with same structure. Each dictionary will contain team's information such as name of the\n                  team, their total wins/losses in current season, their conference standing, the SportSett ids for\n                  their current and previous games. Apart from these general information, they also have the box- and\n                  line- scores for the team in the game. Box score is the stats of players from the team at the end of\n                  the game, while line score along with the whole game stats is divided into quarters and halves as well\n                  as the extra-time (if happened in the game). After these scores, there is another field of next-game,\n                  which gives general information about team's next game such as the place and opponent's name of the\n                  next game.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"summaries\": This is a list of summaries for each game. Some games will have more than one summary,\n                  in that case, the list will have more than one entries. Each summary in the list is a string which can\n                  be tokenised by a space, following the practices in RotoWire-FG dataset (\u003ca\n                    href=\"https://www.aclweb.org/anthology/W19-8639\"\u003eWang, 2019\u003c/a\u003e).\u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure mostly follows the original structure defined in RotoWire dataset (\u003ca\n                href=\"https://aclanthology.org/D17-1239/\"\u003eWiseman et. al. 2017\u003c/a\u003e) with some modifications (such as\n              game and next-game keys) address the problem of information gap between input and output data (\u003ca\n                href=\"https://aclanthology.org/2020.inlg-1.6/\"\u003eThomson et. al. 2020\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimilar to RotoWire dataset (\u003ca href=\"https://aclanthology.org/D17-1239/\"\u003eWiseman et. al. 2017\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"sportsett_id\": \"1\",\n\"gem_id\": \"GEM-sportsett_basketball-train-0\",\n\"game\": {\n\"day\": \"1\",\n\"month\": \"November\",\n\"year\": \"2014\",\n\"dayname\": \"Saturday\",\n\"season\": \"2014\",\n\"stadium\": \"Wells Fargo Center\",\n\"city\": \"Philadelphia\",\n\"state\": \"Pennsylvania\",\n\"attendance\": \"19753\",\n\"capacity\": \"20478\",\n\"game_id\": \"1\"\n},\n\"teams\": {\n\"home\": {\n\"name\": \"76ers\",\n\"place\": \"Philadelphia\",\n\"conference\": \"Eastern Conference\",\n\"division\": \"Atlantic\",\n\"wins\": \"0\",\n\"losses\": \"3\",\n\"conference_standing\": 15,\n\"game_number\": \"3\",\n\"previous_game_id\": \"42\",\n\"next_game_id\": \"2\",\n\"line_score\": {\n  \"game\": {\n    \"FG3A\": \"23\",\n    \"FG3M\": \"7\",\n    \"FG3_PCT\": \"30\",\n    \"FGA\": \"67\",\n    \"FGM\": \"35\",\n    \"FG_PCT\": \"52\",\n    \"FTA\": \"26\",\n    \"FTM\": \"19\",\n    \"FT_PCT\": \"73\",\n    \"DREB\": \"33\",\n    \"OREB\": \"4\",\n    \"TREB\": \"37\",\n    \"BLK\": \"10\",\n    \"AST\": \"28\",\n    \"STL\": \"9\",\n    \"TOV\": \"24\",\n    \"PF\": \"21\",\n    \"PTS\": \"96\",\n    \"MIN\": \"4\"\n  },\n  \"H1\": {\n    \"FG3A\": \"82\",\n    \"FG3M\": \"30\",\n    \"FG3_PCT\": \"37\",\n    \"FGA\": \"2115\",\n    \"FGM\": \"138\",\n    \"FG_PCT\": \"7\",\n    \"FTA\": \"212\",\n    \"FTM\": \"18\",\n    \"FT_PCT\": \"8\",\n    \"DREB\": \"810\",\n    \"OREB\": \"21\",\n    \"TREB\": \"831\",\n    \"BLK\": \"51\",\n    \"AST\": \"107\",\n    \"STL\": \"21\",\n    \"TOV\": \"64\",\n    \"PTS\": \"3024\",\n    \"MIN\": \"6060\"\n  },\n  \"H2\": {\n    \"FG3A\": \"85\",\n    \"FG3M\": \"40\",\n    \"FG3_PCT\": \"47\",\n    \"FGA\": \"1615\",\n    \"FGM\": \"104\",\n    \"FG_PCT\": \"6\",\n    \"FTA\": \"66\",\n    \"FTM\": \"55\",\n    \"FT_PCT\": \"83\",\n    \"DREB\": \"96\",\n    \"OREB\": \"10\",\n    \"TREB\": \"106\",\n    \"BLK\": \"22\",\n    \"AST\": \"92\",\n    \"STL\": \"24\",\n    \"TOV\": \"68\",\n    \"PTS\": \"2913\",\n    \"MIN\": \"6060\"\n  },\n  \"Q1\": {\n    \"FG3A\": \"8\",\n    \"FG3M\": \"3\",\n    \"FG3_PCT\": \"38\",\n    \"FGA\": \"21\",\n    \"FGM\": \"13\",\n    \"FG_PCT\": \"62\",\n    \"FTA\": \"2\",\n    \"FTM\": \"1\",\n    \"FT_PCT\": \"50\",\n    \"DREB\": \"8\",\n    \"OREB\": \"2\",\n    \"TREB\": \"10\",\n    \"BLK\": \"5\",\n    \"AST\": \"10\",\n    \"STL\": \"2\",\n    \"TOV\": \"6\",\n    \"PTS\": \"30\",\n    \"MIN\": \"60\"\n  },\n  \"Q2\": {\n    \"FG3A\": \"2\",\n    \"FG3M\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FGA\": \"15\",\n    \"FGM\": \"8\",\n    \"FG_PCT\": \"53\",\n    \"FTA\": \"12\",\n    \"FTM\": \"8\",\n    \"FT_PCT\": \"67\",\n    \"DREB\": \"10\",\n    \"OREB\": \"1\",\n    \"TREB\": \"11\",\n    \"BLK\": \"1\",\n    \"AST\": \"7\",\n    \"STL\": \"1\",\n    \"TOV\": \"4\",\n    \"PTS\": \"24\",\n    \"MIN\": \"60\"\n  },\n  \"Q3\": {\n    \"FG3A\": \"8\",\n    \"FG3M\": \"4\",\n    \"FG3_PCT\": \"50\",\n    \"FGA\": \"16\",\n    \"FGM\": \"10\",\n    \"FG_PCT\": \"62\",\n    \"FTA\": \"6\",\n    \"FTM\": \"5\",\n    \"FT_PCT\": \"83\",\n    \"DREB\": \"9\",\n    \"OREB\": \"1\",\n    \"TREB\": \"10\",\n    \"BLK\": \"2\",\n    \"AST\": \"9\",\n    \"STL\": \"2\",\n    \"TOV\": \"6\",\n    \"PTS\": \"29\",\n    \"MIN\": \"60\"\n  },\n  \"Q4\": {\n    \"FG3A\": \"5\",\n    \"FG3M\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FGA\": \"15\",\n    \"FGM\": \"4\",\n    \"FG_PCT\": \"27\",\n    \"FTA\": \"6\",\n    \"FTM\": \"5\",\n    \"FT_PCT\": \"83\",\n    \"DREB\": \"6\",\n    \"OREB\": \"0\",\n    \"TREB\": \"6\",\n    \"BLK\": \"2\",\n    \"AST\": \"2\",\n    \"STL\": \"4\",\n    \"TOV\": \"8\",\n    \"PTS\": \"13\",\n    \"MIN\": \"60\"\n  },\n  \"OT\": {\n    \"FG3A\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FGA\": \"0\",\n    \"FGM\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FTA\": \"0\",\n    \"FTM\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"DREB\": \"0\",\n    \"OREB\": \"0\",\n    \"TREB\": \"0\",\n    \"BLK\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"TOV\": \"0\",\n    \"PTS\": \"0\",\n    \"MIN\": \"0\"\n  }\n},\n\"box_score\": [\n  {\n    \"first_name\": \"Tony\",\n    \"last_name\": \"Wroten\",\n    \"name\": \"Tony Wroten\",\n    \"starter\": \"True\",\n    \"MIN\": \"33\",\n    \"FGM\": \"6\",\n    \"FGA\": \"11\",\n    \"FG_PCT\": \"55\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"4\",\n    \"FG3_PCT\": \"25\",\n    \"FTM\": \"8\",\n    \"FTA\": \"11\",\n    \"FT_PCT\": \"73\",\n    \"OREB\": \"0\",\n    \"DREB\": \"3\",\n    \"TREB\": \"3\",\n    \"AST\": \"10\",\n    \"STL\": \"1\",\n    \"BLK\": \"1\",\n    \"TOV\": \"4\",\n    \"PF\": \"1\",\n    \"PTS\": \"21\",\n    \"+/-\": \"-11\",\n    \"DOUBLE\": \"double\"\n  },\n  {\n    \"first_name\": \"Hollis\",\n    \"last_name\": \"Thompson\",\n    \"name\": \"Hollis Thompson\",\n    \"starter\": \"True\",\n    \"MIN\": \"32\",\n    \"FGM\": \"4\",\n    \"FGA\": \"8\",\n    \"FG_PCT\": \"50\",\n    \"FG3M\": \"2\",\n    \"FG3A\": \"5\",\n    \"FG3_PCT\": \"40\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"2\",\n    \"STL\": \"0\",\n    \"BLK\": \"3\",\n    \"TOV\": \"2\",\n    \"PF\": \"2\",\n    \"PTS\": \"10\",\n    \"+/-\": \"-17\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Henry\",\n    \"last_name\": \"Sims\",\n    \"name\": \"Henry Sims\",\n    \"starter\": \"True\",\n    \"MIN\": \"27\",\n    \"FGM\": \"4\",\n    \"FGA\": \"9\",\n    \"FG_PCT\": \"44\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"1\",\n    \"FTA\": \"2\",\n    \"FT_PCT\": \"50\",\n    \"OREB\": \"1\",\n    \"DREB\": \"3\",\n    \"TREB\": \"4\",\n    \"AST\": \"2\",\n    \"STL\": \"0\",\n    \"BLK\": \"1\",\n    \"TOV\": \"0\",\n    \"PF\": \"1\",\n    \"PTS\": \"9\",\n    \"+/-\": \"-10\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Nerlens\",\n    \"last_name\": \"Noel\",\n    \"name\": \"Nerlens Noel\",\n    \"starter\": \"True\",\n    \"MIN\": \"25\",\n    \"FGM\": \"1\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"25\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"5\",\n    \"TREB\": \"5\",\n    \"AST\": \"3\",\n    \"STL\": \"1\",\n    \"BLK\": \"1\",\n    \"TOV\": \"3\",\n    \"PF\": \"1\",\n    \"PTS\": \"2\",\n    \"+/-\": \"-19\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Luc\",\n    \"last_name\": \"Mbah a Moute\",\n    \"name\": \"Luc Mbah a Moute\",\n    \"starter\": \"True\",\n    \"MIN\": \"19\",\n    \"FGM\": \"4\",\n    \"FGA\": \"10\",\n    \"FG_PCT\": \"40\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"2\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"1\",\n    \"FTA\": \"2\",\n    \"FT_PCT\": \"50\",\n    \"OREB\": \"3\",\n    \"DREB\": \"4\",\n    \"TREB\": \"7\",\n    \"AST\": \"3\",\n    \"STL\": \"1\",\n    \"BLK\": \"0\",\n    \"TOV\": \"6\",\n    \"PF\": \"3\",\n    \"PTS\": \"9\",\n    \"+/-\": \"-12\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Brandon\",\n    \"last_name\": \"Davies\",\n    \"name\": \"Brandon Davies\",\n    \"starter\": \"False\",\n    \"MIN\": \"23\",\n    \"FGM\": \"7\",\n    \"FGA\": \"9\",\n    \"FG_PCT\": \"78\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"2\",\n    \"FG3_PCT\": \"50\",\n    \"FTM\": \"3\",\n    \"FTA\": \"4\",\n    \"FT_PCT\": \"75\",\n    \"OREB\": \"0\",\n    \"DREB\": \"3\",\n    \"TREB\": \"3\",\n    \"AST\": \"0\",\n    \"STL\": \"3\",\n    \"BLK\": \"0\",\n    \"TOV\": \"3\",\n    \"PF\": \"3\",\n    \"PTS\": \"18\",\n    \"+/-\": \"-1\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Chris\",\n    \"last_name\": \"Johnson\",\n    \"name\": \"Chris Johnson\",\n    \"starter\": \"False\",\n    \"MIN\": \"21\",\n    \"FGM\": \"2\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"50\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"3\",\n    \"FG3_PCT\": \"33\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"2\",\n    \"TREB\": \"2\",\n    \"AST\": \"0\",\n    \"STL\": \"3\",\n    \"BLK\": \"0\",\n    \"TOV\": \"2\",\n    \"PF\": \"5\",\n    \"PTS\": \"5\",\n    \"+/-\": \"3\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"K.J.\",\n    \"last_name\": \"McDaniels\",\n    \"name\": \"K.J. McDaniels\",\n    \"starter\": \"False\",\n    \"MIN\": \"20\",\n    \"FGM\": \"2\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"50\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"3\",\n    \"FG3_PCT\": \"33\",\n    \"FTM\": \"3\",\n    \"FTA\": \"4\",\n    \"FT_PCT\": \"75\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"2\",\n    \"STL\": \"0\",\n    \"BLK\": \"3\",\n    \"TOV\": \"2\",\n    \"PF\": \"3\",\n    \"PTS\": \"8\",\n    \"+/-\": \"-10\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Malcolm\",\n    \"last_name\": \"Thomas\",\n    \"name\": \"Malcolm Thomas\",\n    \"starter\": \"False\",\n    \"MIN\": \"19\",\n    \"FGM\": \"4\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"100\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"9\",\n    \"TREB\": \"9\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"2\",\n    \"PTS\": \"8\",\n    \"+/-\": \"-6\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Alexey\",\n    \"last_name\": \"Shved\",\n    \"name\": \"Alexey Shved\",\n    \"starter\": \"False\",\n    \"MIN\": \"14\",\n    \"FGM\": \"1\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"25\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"4\",\n    \"FG3_PCT\": \"25\",\n    \"FTM\": \"3\",\n    \"FTA\": \"3\",\n    \"FT_PCT\": \"100\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"6\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"2\",\n    \"PF\": \"0\",\n    \"PTS\": \"6\",\n    \"+/-\": \"-7\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"JaKarr\",\n    \"last_name\": \"Sampson\",\n    \"name\": \"JaKarr Sampson\",\n    \"starter\": \"False\",\n    \"MIN\": \"2\",\n    \"FGM\": \"0\",\n    \"FGA\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"1\",\n    \"TOV\": \"0\",\n    \"PF\": \"0\",\n    \"PTS\": \"0\",\n    \"+/-\": \"0\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Michael\",\n    \"last_name\": \"Carter-Williams\",\n    \"name\": \"Michael Carter-Williams\",\n    \"starter\": \"False\",\n    \"MIN\": \"0\",\n    \"FGM\": \"0\",\n    \"FGA\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"0\",\n    \"TREB\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"0\",\n    \"PTS\": \"0\",\n    \"+/-\": \"0\",\n    \"DOUBLE\": \"none\"\n  }\n],\n\"next_game\": {\n  \"day\": \"3\",\n  \"month\": \"November\",\n  \"year\": \"2014\",\n  \"dayname\": \"Monday\",\n  \"stadium\": \"Wells Fargo Center\",\n  \"city\": \"Philadelphia\",\n  \"opponent_name\": \"Rockets\",\n  \"opponent_place\": \"Houston\",\n  \"is_home\": \"True\"\n}\n},\n\"vis\": {\n\"name\": \"Heat\",\n\"place\": \"Miami\",\n\"conference\": \"Eastern Conference\",\n\"division\": \"Southeast\",\n\"wins\": \"2\",\n\"losses\": \"0\",\n\"conference_standing\": 1,\n\"game_number\": \"2\",\n\"previous_game_id\": \"329\",\n\"next_game_id\": \"330\",\n\"line_score\": {\n  \"game\": {\n    \"FG3A\": \"24\",\n    \"FG3M\": \"12\",\n    \"FG3_PCT\": \"50\",\n    \"FGA\": \"83\",\n    \"FGM\": \"41\",\n    \"FG_PCT\": \"49\",\n    \"FTA\": \"29\",\n    \"FTM\": \"20\",\n    \"FT_PCT\": \"69\",\n    \"DREB\": \"26\",\n    \"OREB\": \"9\",\n    \"TREB\": \"35\",\n    \"BLK\": \"0\",\n    \"AST\": \"33\",\n    \"STL\": \"16\",\n    \"TOV\": \"16\",\n    \"PF\": \"20\",\n    \"PTS\": \"114\",\n    \"MIN\": \"4\"\n  },\n  \"H1\": {\n    \"FG3A\": \"69\",\n    \"FG3M\": \"44\",\n    \"FG3_PCT\": \"64\",\n    \"FGA\": \"2321\",\n    \"FGM\": \"1110\",\n    \"FG_PCT\": \"48\",\n    \"FTA\": \"106\",\n    \"FTM\": \"64\",\n    \"FT_PCT\": \"60\",\n    \"DREB\": \"35\",\n    \"OREB\": \"23\",\n    \"TREB\": \"58\",\n    \"BLK\": \"00\",\n    \"AST\": \"88\",\n    \"STL\": \"53\",\n    \"TOV\": \"34\",\n    \"PTS\": \"3228\",\n    \"MIN\": \"6060\"\n  },\n  \"H2\": {\n    \"FG3A\": \"45\",\n    \"FG3M\": \"22\",\n    \"FG3_PCT\": \"49\",\n    \"FGA\": \"1920\",\n    \"FGM\": \"1010\",\n    \"FG_PCT\": \"53\",\n    \"FTA\": \"85\",\n    \"FTM\": \"55\",\n    \"FT_PCT\": \"65\",\n    \"DREB\": \"612\",\n    \"OREB\": \"22\",\n    \"TREB\": \"634\",\n    \"BLK\": \"00\",\n    \"AST\": \"98\",\n    \"STL\": \"35\",\n    \"TOV\": \"36\",\n    \"PTS\": \"2727\",\n    \"MIN\": \"6060\"\n  },\n  \"Q1\": {\n    \"FG3A\": \"6\",\n    \"FG3M\": \"4\",\n    \"FG3_PCT\": \"67\",\n    \"FGA\": \"23\",\n    \"FGM\": \"11\",\n    \"FG_PCT\": \"48\",\n    \"FTA\": \"10\",\n    \"FTM\": \"6\",\n    \"FT_PCT\": \"60\",\n    \"DREB\": \"3\",\n    \"OREB\": \"2\",\n    \"TREB\": \"5\",\n    \"BLK\": \"0\",\n    \"AST\": \"8\",\n    \"STL\": \"5\",\n    \"TOV\": \"3\",\n    \"PTS\": \"32\",\n    \"MIN\": \"60\"\n  },\n  \"Q2\": {\n    \"FG3A\": \"9\",\n    \"FG3M\": \"4\",\n    \"FG3_PCT\": \"44\",\n    \"FGA\": \"21\",\n    \"FGM\": \"10\",\n    \"FG_PCT\": \"48\",\n    \"FTA\": \"6\",\n    \"FTM\": \"4\",\n    \"FT_PCT\": \"67\",\n    \"DREB\": \"5\",\n    \"OREB\": \"3\",\n    \"TREB\": \"8\",\n    \"BLK\": \"0\",\n    \"AST\": \"8\",\n    \"STL\": \"3\",\n    \"TOV\": \"4\",\n    \"PTS\": \"28\",\n    \"MIN\": \"60\"\n  },\n  \"Q3\": {\n    \"FG3A\": \"4\",\n    \"FG3M\": \"2\",\n    \"FG3_PCT\": \"50\",\n    \"FGA\": \"19\",\n    \"FGM\": \"10\",\n    \"FG_PCT\": \"53\",\n    \"FTA\": \"8\",\n    \"FTM\": \"5\",\n    \"FT_PCT\": \"62\",\n    \"DREB\": \"6\",\n    \"OREB\": \"2\",\n    \"TREB\": \"8\",\n    \"BLK\": \"0\",\n    \"AST\": \"9\",\n    \"STL\": \"3\",\n    \"TOV\": \"3\",\n    \"PTS\": \"27\",\n    \"MIN\": \"60\"\n  },\n  \"Q4\": {\n    \"FG3A\": \"5\",\n    \"FG3M\": \"2\",\n    \"FG3_PCT\": \"40\",\n    \"FGA\": \"20\",\n    \"FGM\": \"10\",\n    \"FG_PCT\": \"50\",\n    \"FTA\": \"5\",\n    \"FTM\": \"5\",\n    \"FT_PCT\": \"100\",\n    \"DREB\": \"12\",\n    \"OREB\": \"2\",\n    \"TREB\": \"14\",\n    \"BLK\": \"0\",\n    \"AST\": \"8\",\n    \"STL\": \"5\",\n    \"TOV\": \"6\",\n    \"PTS\": \"27\",\n    \"MIN\": \"60\"\n  },\n  \"OT\": {\n    \"FG3A\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FGA\": \"0\",\n    \"FGM\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FTA\": \"0\",\n    \"FTM\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"DREB\": \"0\",\n    \"OREB\": \"0\",\n    \"TREB\": \"0\",\n    \"BLK\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"TOV\": \"0\",\n    \"PTS\": \"0\",\n    \"MIN\": \"0\"\n  }\n},\n\"box_score\": [\n  {\n    \"first_name\": \"Chris\",\n    \"last_name\": \"Bosh\",\n    \"name\": \"Chris Bosh\",\n    \"starter\": \"True\",\n    \"MIN\": \"33\",\n    \"FGM\": \"9\",\n    \"FGA\": \"17\",\n    \"FG_PCT\": \"53\",\n    \"FG3M\": \"2\",\n    \"FG3A\": \"5\",\n    \"FG3_PCT\": \"40\",\n    \"FTM\": \"10\",\n    \"FTA\": \"11\",\n    \"FT_PCT\": \"91\",\n    \"OREB\": \"3\",\n    \"DREB\": \"5\",\n    \"TREB\": \"8\",\n    \"AST\": \"4\",\n    \"STL\": \"2\",\n    \"BLK\": \"0\",\n    \"TOV\": \"3\",\n    \"PF\": \"2\",\n    \"PTS\": \"30\",\n    \"+/-\": \"10\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Dwyane\",\n    \"last_name\": \"Wade\",\n    \"name\": \"Dwyane Wade\",\n    \"starter\": \"True\",\n    \"MIN\": \"32\",\n    \"FGM\": \"4\",\n    \"FGA\": \"18\",\n    \"FG_PCT\": \"22\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"1\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"1\",\n    \"FTA\": \"3\",\n    \"FT_PCT\": \"33\",\n    \"OREB\": \"1\",\n    \"DREB\": \"2\",\n    \"TREB\": \"3\",\n    \"AST\": \"10\",\n    \"STL\": \"3\",\n    \"BLK\": \"0\",\n    \"TOV\": \"6\",\n    \"PF\": \"1\",\n    \"PTS\": \"9\",\n    \"+/-\": \"13\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Luol\",\n    \"last_name\": \"Deng\",\n    \"name\": \"Luol Deng\",\n    \"starter\": \"True\",\n    \"MIN\": \"29\",\n    \"FGM\": \"7\",\n    \"FGA\": \"11\",\n    \"FG_PCT\": \"64\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"3\",\n    \"FG3_PCT\": \"33\",\n    \"FTM\": \"0\",\n    \"FTA\": \"1\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"2\",\n    \"DREB\": \"2\",\n    \"TREB\": \"4\",\n    \"AST\": \"2\",\n    \"STL\": \"2\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"0\",\n    \"PTS\": \"15\",\n    \"+/-\": \"4\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Shawne\",\n    \"last_name\": \"Williams\",\n    \"name\": \"Shawne Williams\",\n    \"starter\": \"True\",\n    \"MIN\": \"29\",\n    \"FGM\": \"5\",\n    \"FGA\": \"9\",\n    \"FG_PCT\": \"56\",\n    \"FG3M\": \"3\",\n    \"FG3A\": \"5\",\n    \"FG3_PCT\": \"60\",\n    \"FTM\": \"2\",\n    \"FTA\": \"2\",\n    \"FT_PCT\": \"100\",\n    \"OREB\": \"0\",\n    \"DREB\": \"4\",\n    \"TREB\": \"4\",\n    \"AST\": \"4\",\n    \"STL\": \"1\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"4\",\n    \"PTS\": \"15\",\n    \"+/-\": \"16\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Norris\",\n    \"last_name\": \"Cole\",\n    \"name\": \"Norris Cole\",\n    \"starter\": \"True\",\n    \"MIN\": \"27\",\n    \"FGM\": \"4\",\n    \"FGA\": \"7\",\n    \"FG_PCT\": \"57\",\n    \"FG3M\": \"2\",\n    \"FG3A\": \"4\",\n    \"FG3_PCT\": \"50\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"4\",\n    \"STL\": \"2\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"1\",\n    \"PTS\": \"10\",\n    \"+/-\": \"6\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Mario\",\n    \"last_name\": \"Chalmers\",\n    \"name\": \"Mario Chalmers\",\n    \"starter\": \"False\",\n    \"MIN\": \"25\",\n    \"FGM\": \"6\",\n    \"FGA\": \"9\",\n    \"FG_PCT\": \"67\",\n    \"FG3M\": \"2\",\n    \"FG3A\": \"2\",\n    \"FG3_PCT\": \"100\",\n    \"FTM\": \"6\",\n    \"FTA\": \"10\",\n    \"FT_PCT\": \"60\",\n    \"OREB\": \"0\",\n    \"DREB\": \"2\",\n    \"TREB\": \"2\",\n    \"AST\": \"4\",\n    \"STL\": \"4\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"1\",\n    \"PTS\": \"20\",\n    \"+/-\": \"18\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Shabazz\",\n    \"last_name\": \"Napier\",\n    \"name\": \"Shabazz Napier\",\n    \"starter\": \"False\",\n    \"MIN\": \"20\",\n    \"FGM\": \"2\",\n    \"FGA\": \"3\",\n    \"FG_PCT\": \"67\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"2\",\n    \"FG3_PCT\": \"50\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"3\",\n    \"TREB\": \"3\",\n    \"AST\": \"4\",\n    \"STL\": \"2\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"4\",\n    \"PTS\": \"5\",\n    \"+/-\": \"11\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Chris\",\n    \"last_name\": \"Andersen\",\n    \"name\": \"Chris Andersen\",\n    \"starter\": \"False\",\n    \"MIN\": \"17\",\n    \"FGM\": \"0\",\n    \"FGA\": \"2\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"1\",\n    \"DREB\": \"2\",\n    \"TREB\": \"3\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"2\",\n    \"PTS\": \"0\",\n    \"+/-\": \"6\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Josh\",\n    \"last_name\": \"McRoberts\",\n    \"name\": \"Josh McRoberts\",\n    \"starter\": \"False\",\n    \"MIN\": \"11\",\n    \"FGM\": \"1\",\n    \"FGA\": \"3\",\n    \"FG_PCT\": \"33\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"1\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"1\",\n    \"FTA\": \"2\",\n    \"FT_PCT\": \"50\",\n    \"OREB\": \"0\",\n    \"DREB\": \"3\",\n    \"TREB\": \"3\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"2\",\n    \"PF\": \"3\",\n    \"PTS\": \"3\",\n    \"+/-\": \"1\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"James\",\n    \"last_name\": \"Ennis\",\n    \"name\": \"James Ennis\",\n    \"starter\": \"False\",\n    \"MIN\": \"7\",\n    \"FGM\": \"2\",\n    \"FGA\": \"3\",\n    \"FG_PCT\": \"67\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"1\",\n    \"FG3_PCT\": \"100\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"1\",\n    \"DREB\": \"1\",\n    \"TREB\": \"2\",\n    \"AST\": \"1\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"1\",\n    \"PTS\": \"5\",\n    \"+/-\": \"2\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Justin\",\n    \"last_name\": \"Hamilton\",\n    \"name\": \"Justin Hamilton\",\n    \"starter\": \"False\",\n    \"MIN\": \"5\",\n    \"FGM\": \"1\",\n    \"FGA\": \"1\",\n    \"FG_PCT\": \"100\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"1\",\n    \"DREB\": \"1\",\n    \"TREB\": \"2\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"0\",\n    \"PTS\": \"2\",\n    \"+/-\": \"3\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Andre\",\n    \"last_name\": \"Dawkins\",\n    \"name\": \"Andre Dawkins\",\n    \"starter\": \"False\",\n    \"MIN\": \"1\",\n    \"FGM\": \"0\",\n    \"FGA\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"0\",\n    \"TREB\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"1\",\n    \"PTS\": \"0\",\n    \"+/-\": \"0\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Shannon\",\n    \"last_name\": \"Brown\",\n    \"name\": \"Shannon Brown\",\n    \"starter\": \"False\",\n    \"MIN\": \"0\",\n    \"FGM\": \"0\",\n    \"FGA\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"0\",\n    \"TREB\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"0\",\n    \"PTS\": \"0\",\n    \"+/-\": \"0\",\n    \"DOUBLE\": \"none\"\n  }\n],\n\"next_game\": {\n  \"day\": \"2\",\n  \"month\": \"November\",\n  \"year\": \"2014\",\n  \"dayname\": \"Sunday\",\n  \"stadium\": \"American Airlines Arena\",\n  \"city\": \"Miami\",\n  \"opponent_name\": \"Raptors\",\n  \"opponent_place\": \"Toronto\",\n  \"is_home\": \"True\"\n}\n}\n},\n\"summaries\": [\n\"The Miami Heat ( 20 ) defeated the Philadelphia 76ers ( 0 - 3 ) 114 - 96 on Saturday . Chris Bosh scored a game - high 30 points to go with eight rebounds in 33 minutes . Josh McRoberts made his Heat debut after missing the entire preseason recovering from toe surgery . McRoberts came off the bench and played 11 minutes . Shawne Williams was once again the starter at power forward in McRoberts ' stead . Williams finished with 15 points and three three - pointers in 29 minutes . Mario Chalmers scored 18 points in 25 minutes off the bench . Luc Richard Mbah a Moute replaced Chris Johnson in the starting lineup for the Sixers on Saturday . Hollis Thompson shifted down to the starting shooting guard job to make room for Mbah a Moute . Mbah a Moute finished with nine points and seven rebounds in 19 minutes . K.J . McDaniels , who suffered a minor hip flexor injury in Friday 's game , was available and played 21 minutes off the bench , finishing with eight points and three blocks . Michael Carter-Williams is expected to be out until Nov. 13 , but Tony Wroten continues to put up impressive numbers in Carter-Williams ' absence . Wroten finished with a double - double of 21 points and 10 assists in 33 minutes . The Heat will complete a back - to - back set at home Sunday against the Tornoto Raptors . The Sixers ' next game is at home Monday against the Houston Rockets .\"\n]\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTrain: NBA seasons - 2014, 2015, \u0026#x26; 2016; total instances - 3690\u003c/li\u003e\n              \u003cli\u003eValidation: NBA seasons - 2017; total instances - 1230\u003c/li\u003e\n              \u003cli\u003eTest: NBA seasons - 2018; total instances - 1230\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits were created as per different NBA seasons. All the games in regular season (no play-offs) are\n              added in the dataset\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset contains a data analytics problem in the classic sense (\u003ca\n                href=\"https://aclanthology.org/W07-2315\"\u003eReiter, 2007\u003c/a\u003e). That is, there is a large amount of data\n              from which insights need to be selected. Further, the insights should be both from simple shallow queries\n              (such as dirext transcriptions of the properties of a subject, i.e., a player and their statistics), as\n              well as aggregated (how a player has done over time). There is far more on the data side than is required\n              to be realised, and indeed, could be practically realised. This depth of data analytics problem does not\n              exist in other datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMany, if not all aspects of data-to-text systems can be measured with this dataset. It has complex data\n              analytics, meaninful document planning (10-15 sentence documents with a narrative structure), as well as\n              microplanning and realisation requirements. Finding models to handle this volume of data, as well as\n              methods for meaninfully evaluate generations is a very open question.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor dataset discussion see \u003ca href=\"https://aclanthology.org/2020.intellang-1.4/\"\u003eThomson et al, 2020\u003c/a\u003e\n            \u003c/p\u003e\n            \u003cp\u003eFor evaluation see:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2021.inlg-1.23\"\u003eThomson \u0026#x26; Reiter 2020, Thomson \u0026#x26; Reiter\n                  (2021)\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2021.inlg-1.25\"\u003eKasner et al (2021)\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eFor a system using the relational database form of SportSett, see:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.inlg-1.6/\"\u003eThomson et al (2020)\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eFor recent systems using the Rotowire dataset, see:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://github.com/ratishsp/data2text-macro-plan-py\"\u003ePuduppully \u0026#x26; Lapata (2021)\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://github.com/KaijuML/data-to-text-hierarchical\"\u003eRebuffel et all (2020)\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMany, if not all aspects of data-to-text systems can be measured with this dataset. It has complex data\n              analytics, meaninful document planning (10-15 sentence documents with a narrative structure), as well as\n              microplanning and realisation requirements. Finding models to handle this volume of data, as well as\n              methods for meaninfully evaluate generations is a very open question.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU is the only off-the-shelf metric commonly used. Works have also used custom metrics like RG (\u003ca\n                href=\"https://aclanthology.org/D17-1239\"\u003eWiseman et al, 2017\u003c/a\u003e), and a recent shared task explored\n              other metrics and their corrolation with human evaluation (\u003ca\n                href=\"https://aclanthology.org/2021.inlg-1.23\"\u003eThomson \u0026#x26; Reiter, 2021\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMost results from prior works use the original Rotowire dataset, which has train/validation/test\n              contamination. For results of BLEU and RG on the relational database format of SportSett, as a guide, see\n              \u003ca href=\"https://aclanthology.org/2020.inlg-1.6\"\u003eThomson et al, 2020\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe results on this dataset are largely unexplored, as is the selection of suitable metrics that\n              correlate with human judgment. See Thomson et al, 2021 (\u003ca\n                href=\"https://aclanthology.org/2021.inlg-1.23\"\u003ehttps://aclanthology.org/2021.inlg-1.23\u003c/a\u003e) for an\n              overview, and Kasner et al (2021) for the best performing metric at the time of writing (\u003ca\n                href=\"https://aclanthology.org/2021.inlg-1.25\"\u003ehttps://aclanthology.org/2021.inlg-1.25\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe references texts were taken from the existing dataset RotoWire-FG (\u003ca\n                href=\"https://www.aclweb.org/anthology/W19-8639\"\u003eWang, 2019\u003c/a\u003e), which is in turn based on Rotowire (\u003ca\n                href=\"https://aclanthology.org/D17-1239\"\u003eWiseman et al, 2017\u003c/a\u003e). The rationale behind this dataset was\n              to re-structure the data such that aggregate statistics over multiple games, as well as upcoming game\n              schedules could be included, moving the dataset from snapshots of single games, to a format where almost\n              everything that could be present in the reference texts could be found in the data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCreate a summary of a basketball, with insightful facts about the game, teams, and players, both within\n              the game, withing periods during the game, and over the course of seasons/careers where appropriate. This\n              is a data-to-text problem in the classic sense (\u003ca href=\"https://aclanthology.org/W07-2315\"\u003eReiter,\n                2007\u003c/a\u003e) in that it has a difficult data analystics state, in addition to ordering and transcription of\n              selected facts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRotoWire-FG (\u003ca href=\"https://www.rotowire.com\"\u003ehttps://www.rotowire.com\u003c/a\u003e).\n              Wikipedia (\u003ca href=\"https://en.wikipedia.org/wiki/Main_Page\"\u003ehttps://en.wikipedia.org/wiki/Main_Page\u003c/a\u003e)\n              Basketball Reference (\u003ca\n                href=\"https://www.basketball-reference.com\"\u003ehttps://www.basketball-reference.com\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummaries of basketball games (in the NBA).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt retains the original tokenization scheme employed by Wang 2019\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emanually\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGames from the 2014 through 2018 seasons were selected. Within these seasons games are not filtered, all\n              are present, but this was an arbitrary solution from the original RotoWirte-FG dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset consits of a pre-existing dataset, as well as publically available facts.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUnaware of any work, but, this is a dataset considting solely of summaries of mens professional\n              basketball games. It does not cover different levels of the sport, or different genders, and all pronouns\n              are likely to be male unless a specific player is referred to by other pronouns in the training text. This\n              makes it difficult to train systems where gender can be specified as an attribute, although it is an\n              interesting, open problem that could be investigated using the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo, it is very specifically American English from the sports journalism domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAll information relating to persons is of public record.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSportSett resolved the major overlap problems of RotoWire, although some overlap is unavoidable. For\n              example, whilst it is not possible to find career totals and other historic information for all players\n              (the data only goes back to 2014), it is possible to do so for some players. It is unavoidable that some\n              data which is aggregated, exists in its base form in previous partitions. The season-based partition\n              scheme heavily constrains this however.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFactual accuray continues to be a problem, systems may incorrectly represent the facts of the game.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUsing the RG metric to maximise the number of true facts in a generate summary is not nececeraly\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"sportsett_basketball","type":"Data-to-Text","languages":"English","summary":"The sportsett dataset is an English data-to-text dataset in the basketball domain. The inputs are statistics summarizing an NBA game and the outputs are high-quality descriptions of the game in natural language."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"sportsett_basketball"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"sportsett_basketball","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003esportsett_basketball\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe sportsett dataset is an English data-to-text dataset in the basketball domain. The inputs are statistics\n          summarizing an NBA game and the outputs are high-quality descriptions of the game in natural language.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/sportsett_basketball')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/sportsett_basketball\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/nlgcat/sport_sett_basketball\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.intellang-1.4/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eCraig Thomson, Ashish Upadhyay\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eCraig Thomson\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003emit: MIT License\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eA model trained on this dataset should summarise the statistical and other information from a basketball\n          game. This will be focused on a single game, although facts from prior games, or aggregate statistics over\n          many games can and should be used for comparison where appropriate. There no single common narrative, although\n          summaries usually start with who player, when, where, and the score. They then provide high level commentary\n          on what the difference in the game was (why the winner won). breakdowns of statistics for prominent players\n          follow, winning team first. Finally, the upcoming schedule for both teams is usually included. There are,\n          however, other types of fact that can be included, and other narrative structures.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/nlgcat/sport_sett_basketball\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/nlgcat/sport_sett_basketball\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.intellang-1.4/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{thomson-etal-2020-sportsett,\ntitle = \"{S}port{S}ett:Basketball - A robust and maintainable data-set for Natural Language Generation\",\nauthor = \"Thomson, Craig  and\nReiter, Ehud  and\nSripada, Somayajulu\",\nbooktitle = \"Proceedings of the Workshop on Intelligent Information Processing and Natural Language Generation\",\nmonth = sep,\nyear = \"2020\",\naddress = \"Santiago de Compostela, Spain\",\npublisher = \"Association for Computational Lingustics\",\nurl = \"https://aclanthology.org/2020.intellang-1.4\",\npages = \"32--40\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCraig Thomson\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:c.thomson@abdn.ac.uk\"\u003ec.thomson@abdn.ac.uk\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAmerican English\u003c/p\u003e\n            \u003cp\u003eOne dialect, one language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAmerican sports writers\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emit: MIT License\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMaintain a robust and scalable Data-to-Text generation resource with structured data and textual\n              summaries\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA model trained on this dataset should summarise the statistical and other information from a basketball\n              game. This will be focused on a single game, although facts from prior games, or aggregate statistics over\n              many games can and should be used for comparison where appropriate. There no single common narrative,\n              although summaries usually start with who player, when, where, and the score. They then provide high level\n              commentary on what the difference in the game was (why the winner won). breakdowns of statistics for\n              prominent players follow, winning team first. Finally, the upcoming schedule for both teams is usually\n              included. There are, however, other types of fact that can be included, and other narrative structures.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Aberdeen, Robert Gordon University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCraig Thomson, Ashish Upadhyay\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEPSRC\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCraig Thomson, Ashish Upadhyay\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach instance in the dataset has five fields.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"sportsett_id\": This is a unique id as used in the original SportSett database. It starts with '1'\n                  with the first instance in the train-set and ends with '6150' with the last instance in test-set.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"gem_id\": This is a unique id created as per GEM's requirement which follows the\n                  \u003ccode\u003eGEM-${DATASET_NAME}-${SPLIT-NAME}-${id}\u003c/code\u003e pattern.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"game\": This field contains a dictionary with information about current game. It has information such\n                  as date on which the game was played alongwith the stadium, city, state where it was played.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"teams\": This filed is a dictionary of multiple nested dictionaries. On the highest level, it has two\n                  keys: 'home' and 'vis', which provide the stats for home team and visiting team of the game. Both are\n                  dictionaries with same structure. Each dictionary will contain team's information such as name of the\n                  team, their total wins/losses in current season, their conference standing, the SportSett ids for\n                  their current and previous games. Apart from these general information, they also have the box- and\n                  line- scores for the team in the game. Box score is the stats of players from the team at the end of\n                  the game, while line score along with the whole game stats is divided into quarters and halves as well\n                  as the extra-time (if happened in the game). After these scores, there is another field of next-game,\n                  which gives general information about team's next game such as the place and opponent's name of the\n                  next game.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\"summaries\": This is a list of summaries for each game. Some games will have more than one summary,\n                  in that case, the list will have more than one entries. Each summary in the list is a string which can\n                  be tokenised by a space, following the practices in RotoWire-FG dataset (\u003ca\n                    href=\"https://www.aclweb.org/anthology/W19-8639\"\u003eWang, 2019\u003c/a\u003e).\u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure mostly follows the original structure defined in RotoWire dataset (\u003ca\n                href=\"https://aclanthology.org/D17-1239/\"\u003eWiseman et. al. 2017\u003c/a\u003e) with some modifications (such as\n              game and next-game keys) address the problem of information gap between input and output data (\u003ca\n                href=\"https://aclanthology.org/2020.inlg-1.6/\"\u003eThomson et. al. 2020\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimilar to RotoWire dataset (\u003ca href=\"https://aclanthology.org/D17-1239/\"\u003eWiseman et. al. 2017\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"sportsett_id\": \"1\",\n\"gem_id\": \"GEM-sportsett_basketball-train-0\",\n\"game\": {\n\"day\": \"1\",\n\"month\": \"November\",\n\"year\": \"2014\",\n\"dayname\": \"Saturday\",\n\"season\": \"2014\",\n\"stadium\": \"Wells Fargo Center\",\n\"city\": \"Philadelphia\",\n\"state\": \"Pennsylvania\",\n\"attendance\": \"19753\",\n\"capacity\": \"20478\",\n\"game_id\": \"1\"\n},\n\"teams\": {\n\"home\": {\n\"name\": \"76ers\",\n\"place\": \"Philadelphia\",\n\"conference\": \"Eastern Conference\",\n\"division\": \"Atlantic\",\n\"wins\": \"0\",\n\"losses\": \"3\",\n\"conference_standing\": 15,\n\"game_number\": \"3\",\n\"previous_game_id\": \"42\",\n\"next_game_id\": \"2\",\n\"line_score\": {\n  \"game\": {\n    \"FG3A\": \"23\",\n    \"FG3M\": \"7\",\n    \"FG3_PCT\": \"30\",\n    \"FGA\": \"67\",\n    \"FGM\": \"35\",\n    \"FG_PCT\": \"52\",\n    \"FTA\": \"26\",\n    \"FTM\": \"19\",\n    \"FT_PCT\": \"73\",\n    \"DREB\": \"33\",\n    \"OREB\": \"4\",\n    \"TREB\": \"37\",\n    \"BLK\": \"10\",\n    \"AST\": \"28\",\n    \"STL\": \"9\",\n    \"TOV\": \"24\",\n    \"PF\": \"21\",\n    \"PTS\": \"96\",\n    \"MIN\": \"4\"\n  },\n  \"H1\": {\n    \"FG3A\": \"82\",\n    \"FG3M\": \"30\",\n    \"FG3_PCT\": \"37\",\n    \"FGA\": \"2115\",\n    \"FGM\": \"138\",\n    \"FG_PCT\": \"7\",\n    \"FTA\": \"212\",\n    \"FTM\": \"18\",\n    \"FT_PCT\": \"8\",\n    \"DREB\": \"810\",\n    \"OREB\": \"21\",\n    \"TREB\": \"831\",\n    \"BLK\": \"51\",\n    \"AST\": \"107\",\n    \"STL\": \"21\",\n    \"TOV\": \"64\",\n    \"PTS\": \"3024\",\n    \"MIN\": \"6060\"\n  },\n  \"H2\": {\n    \"FG3A\": \"85\",\n    \"FG3M\": \"40\",\n    \"FG3_PCT\": \"47\",\n    \"FGA\": \"1615\",\n    \"FGM\": \"104\",\n    \"FG_PCT\": \"6\",\n    \"FTA\": \"66\",\n    \"FTM\": \"55\",\n    \"FT_PCT\": \"83\",\n    \"DREB\": \"96\",\n    \"OREB\": \"10\",\n    \"TREB\": \"106\",\n    \"BLK\": \"22\",\n    \"AST\": \"92\",\n    \"STL\": \"24\",\n    \"TOV\": \"68\",\n    \"PTS\": \"2913\",\n    \"MIN\": \"6060\"\n  },\n  \"Q1\": {\n    \"FG3A\": \"8\",\n    \"FG3M\": \"3\",\n    \"FG3_PCT\": \"38\",\n    \"FGA\": \"21\",\n    \"FGM\": \"13\",\n    \"FG_PCT\": \"62\",\n    \"FTA\": \"2\",\n    \"FTM\": \"1\",\n    \"FT_PCT\": \"50\",\n    \"DREB\": \"8\",\n    \"OREB\": \"2\",\n    \"TREB\": \"10\",\n    \"BLK\": \"5\",\n    \"AST\": \"10\",\n    \"STL\": \"2\",\n    \"TOV\": \"6\",\n    \"PTS\": \"30\",\n    \"MIN\": \"60\"\n  },\n  \"Q2\": {\n    \"FG3A\": \"2\",\n    \"FG3M\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FGA\": \"15\",\n    \"FGM\": \"8\",\n    \"FG_PCT\": \"53\",\n    \"FTA\": \"12\",\n    \"FTM\": \"8\",\n    \"FT_PCT\": \"67\",\n    \"DREB\": \"10\",\n    \"OREB\": \"1\",\n    \"TREB\": \"11\",\n    \"BLK\": \"1\",\n    \"AST\": \"7\",\n    \"STL\": \"1\",\n    \"TOV\": \"4\",\n    \"PTS\": \"24\",\n    \"MIN\": \"60\"\n  },\n  \"Q3\": {\n    \"FG3A\": \"8\",\n    \"FG3M\": \"4\",\n    \"FG3_PCT\": \"50\",\n    \"FGA\": \"16\",\n    \"FGM\": \"10\",\n    \"FG_PCT\": \"62\",\n    \"FTA\": \"6\",\n    \"FTM\": \"5\",\n    \"FT_PCT\": \"83\",\n    \"DREB\": \"9\",\n    \"OREB\": \"1\",\n    \"TREB\": \"10\",\n    \"BLK\": \"2\",\n    \"AST\": \"9\",\n    \"STL\": \"2\",\n    \"TOV\": \"6\",\n    \"PTS\": \"29\",\n    \"MIN\": \"60\"\n  },\n  \"Q4\": {\n    \"FG3A\": \"5\",\n    \"FG3M\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FGA\": \"15\",\n    \"FGM\": \"4\",\n    \"FG_PCT\": \"27\",\n    \"FTA\": \"6\",\n    \"FTM\": \"5\",\n    \"FT_PCT\": \"83\",\n    \"DREB\": \"6\",\n    \"OREB\": \"0\",\n    \"TREB\": \"6\",\n    \"BLK\": \"2\",\n    \"AST\": \"2\",\n    \"STL\": \"4\",\n    \"TOV\": \"8\",\n    \"PTS\": \"13\",\n    \"MIN\": \"60\"\n  },\n  \"OT\": {\n    \"FG3A\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FGA\": \"0\",\n    \"FGM\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FTA\": \"0\",\n    \"FTM\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"DREB\": \"0\",\n    \"OREB\": \"0\",\n    \"TREB\": \"0\",\n    \"BLK\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"TOV\": \"0\",\n    \"PTS\": \"0\",\n    \"MIN\": \"0\"\n  }\n},\n\"box_score\": [\n  {\n    \"first_name\": \"Tony\",\n    \"last_name\": \"Wroten\",\n    \"name\": \"Tony Wroten\",\n    \"starter\": \"True\",\n    \"MIN\": \"33\",\n    \"FGM\": \"6\",\n    \"FGA\": \"11\",\n    \"FG_PCT\": \"55\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"4\",\n    \"FG3_PCT\": \"25\",\n    \"FTM\": \"8\",\n    \"FTA\": \"11\",\n    \"FT_PCT\": \"73\",\n    \"OREB\": \"0\",\n    \"DREB\": \"3\",\n    \"TREB\": \"3\",\n    \"AST\": \"10\",\n    \"STL\": \"1\",\n    \"BLK\": \"1\",\n    \"TOV\": \"4\",\n    \"PF\": \"1\",\n    \"PTS\": \"21\",\n    \"+/-\": \"-11\",\n    \"DOUBLE\": \"double\"\n  },\n  {\n    \"first_name\": \"Hollis\",\n    \"last_name\": \"Thompson\",\n    \"name\": \"Hollis Thompson\",\n    \"starter\": \"True\",\n    \"MIN\": \"32\",\n    \"FGM\": \"4\",\n    \"FGA\": \"8\",\n    \"FG_PCT\": \"50\",\n    \"FG3M\": \"2\",\n    \"FG3A\": \"5\",\n    \"FG3_PCT\": \"40\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"2\",\n    \"STL\": \"0\",\n    \"BLK\": \"3\",\n    \"TOV\": \"2\",\n    \"PF\": \"2\",\n    \"PTS\": \"10\",\n    \"+/-\": \"-17\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Henry\",\n    \"last_name\": \"Sims\",\n    \"name\": \"Henry Sims\",\n    \"starter\": \"True\",\n    \"MIN\": \"27\",\n    \"FGM\": \"4\",\n    \"FGA\": \"9\",\n    \"FG_PCT\": \"44\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"1\",\n    \"FTA\": \"2\",\n    \"FT_PCT\": \"50\",\n    \"OREB\": \"1\",\n    \"DREB\": \"3\",\n    \"TREB\": \"4\",\n    \"AST\": \"2\",\n    \"STL\": \"0\",\n    \"BLK\": \"1\",\n    \"TOV\": \"0\",\n    \"PF\": \"1\",\n    \"PTS\": \"9\",\n    \"+/-\": \"-10\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Nerlens\",\n    \"last_name\": \"Noel\",\n    \"name\": \"Nerlens Noel\",\n    \"starter\": \"True\",\n    \"MIN\": \"25\",\n    \"FGM\": \"1\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"25\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"5\",\n    \"TREB\": \"5\",\n    \"AST\": \"3\",\n    \"STL\": \"1\",\n    \"BLK\": \"1\",\n    \"TOV\": \"3\",\n    \"PF\": \"1\",\n    \"PTS\": \"2\",\n    \"+/-\": \"-19\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Luc\",\n    \"last_name\": \"Mbah a Moute\",\n    \"name\": \"Luc Mbah a Moute\",\n    \"starter\": \"True\",\n    \"MIN\": \"19\",\n    \"FGM\": \"4\",\n    \"FGA\": \"10\",\n    \"FG_PCT\": \"40\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"2\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"1\",\n    \"FTA\": \"2\",\n    \"FT_PCT\": \"50\",\n    \"OREB\": \"3\",\n    \"DREB\": \"4\",\n    \"TREB\": \"7\",\n    \"AST\": \"3\",\n    \"STL\": \"1\",\n    \"BLK\": \"0\",\n    \"TOV\": \"6\",\n    \"PF\": \"3\",\n    \"PTS\": \"9\",\n    \"+/-\": \"-12\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Brandon\",\n    \"last_name\": \"Davies\",\n    \"name\": \"Brandon Davies\",\n    \"starter\": \"False\",\n    \"MIN\": \"23\",\n    \"FGM\": \"7\",\n    \"FGA\": \"9\",\n    \"FG_PCT\": \"78\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"2\",\n    \"FG3_PCT\": \"50\",\n    \"FTM\": \"3\",\n    \"FTA\": \"4\",\n    \"FT_PCT\": \"75\",\n    \"OREB\": \"0\",\n    \"DREB\": \"3\",\n    \"TREB\": \"3\",\n    \"AST\": \"0\",\n    \"STL\": \"3\",\n    \"BLK\": \"0\",\n    \"TOV\": \"3\",\n    \"PF\": \"3\",\n    \"PTS\": \"18\",\n    \"+/-\": \"-1\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Chris\",\n    \"last_name\": \"Johnson\",\n    \"name\": \"Chris Johnson\",\n    \"starter\": \"False\",\n    \"MIN\": \"21\",\n    \"FGM\": \"2\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"50\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"3\",\n    \"FG3_PCT\": \"33\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"2\",\n    \"TREB\": \"2\",\n    \"AST\": \"0\",\n    \"STL\": \"3\",\n    \"BLK\": \"0\",\n    \"TOV\": \"2\",\n    \"PF\": \"5\",\n    \"PTS\": \"5\",\n    \"+/-\": \"3\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"K.J.\",\n    \"last_name\": \"McDaniels\",\n    \"name\": \"K.J. McDaniels\",\n    \"starter\": \"False\",\n    \"MIN\": \"20\",\n    \"FGM\": \"2\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"50\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"3\",\n    \"FG3_PCT\": \"33\",\n    \"FTM\": \"3\",\n    \"FTA\": \"4\",\n    \"FT_PCT\": \"75\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"2\",\n    \"STL\": \"0\",\n    \"BLK\": \"3\",\n    \"TOV\": \"2\",\n    \"PF\": \"3\",\n    \"PTS\": \"8\",\n    \"+/-\": \"-10\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Malcolm\",\n    \"last_name\": \"Thomas\",\n    \"name\": \"Malcolm Thomas\",\n    \"starter\": \"False\",\n    \"MIN\": \"19\",\n    \"FGM\": \"4\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"100\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"9\",\n    \"TREB\": \"9\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"2\",\n    \"PTS\": \"8\",\n    \"+/-\": \"-6\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Alexey\",\n    \"last_name\": \"Shved\",\n    \"name\": \"Alexey Shved\",\n    \"starter\": \"False\",\n    \"MIN\": \"14\",\n    \"FGM\": \"1\",\n    \"FGA\": \"4\",\n    \"FG_PCT\": \"25\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"4\",\n    \"FG3_PCT\": \"25\",\n    \"FTM\": \"3\",\n    \"FTA\": \"3\",\n    \"FT_PCT\": \"100\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"6\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"2\",\n    \"PF\": \"0\",\n    \"PTS\": \"6\",\n    \"+/-\": \"-7\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"JaKarr\",\n    \"last_name\": \"Sampson\",\n    \"name\": \"JaKarr Sampson\",\n    \"starter\": \"False\",\n    \"MIN\": \"2\",\n    \"FGM\": \"0\",\n    \"FGA\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"1\",\n    \"TOV\": \"0\",\n    \"PF\": \"0\",\n    \"PTS\": \"0\",\n    \"+/-\": \"0\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Michael\",\n    \"last_name\": \"Carter-Williams\",\n    \"name\": \"Michael Carter-Williams\",\n    \"starter\": \"False\",\n    \"MIN\": \"0\",\n    \"FGM\": \"0\",\n    \"FGA\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"0\",\n    \"TREB\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"0\",\n    \"PTS\": \"0\",\n    \"+/-\": \"0\",\n    \"DOUBLE\": \"none\"\n  }\n],\n\"next_game\": {\n  \"day\": \"3\",\n  \"month\": \"November\",\n  \"year\": \"2014\",\n  \"dayname\": \"Monday\",\n  \"stadium\": \"Wells Fargo Center\",\n  \"city\": \"Philadelphia\",\n  \"opponent_name\": \"Rockets\",\n  \"opponent_place\": \"Houston\",\n  \"is_home\": \"True\"\n}\n},\n\"vis\": {\n\"name\": \"Heat\",\n\"place\": \"Miami\",\n\"conference\": \"Eastern Conference\",\n\"division\": \"Southeast\",\n\"wins\": \"2\",\n\"losses\": \"0\",\n\"conference_standing\": 1,\n\"game_number\": \"2\",\n\"previous_game_id\": \"329\",\n\"next_game_id\": \"330\",\n\"line_score\": {\n  \"game\": {\n    \"FG3A\": \"24\",\n    \"FG3M\": \"12\",\n    \"FG3_PCT\": \"50\",\n    \"FGA\": \"83\",\n    \"FGM\": \"41\",\n    \"FG_PCT\": \"49\",\n    \"FTA\": \"29\",\n    \"FTM\": \"20\",\n    \"FT_PCT\": \"69\",\n    \"DREB\": \"26\",\n    \"OREB\": \"9\",\n    \"TREB\": \"35\",\n    \"BLK\": \"0\",\n    \"AST\": \"33\",\n    \"STL\": \"16\",\n    \"TOV\": \"16\",\n    \"PF\": \"20\",\n    \"PTS\": \"114\",\n    \"MIN\": \"4\"\n  },\n  \"H1\": {\n    \"FG3A\": \"69\",\n    \"FG3M\": \"44\",\n    \"FG3_PCT\": \"64\",\n    \"FGA\": \"2321\",\n    \"FGM\": \"1110\",\n    \"FG_PCT\": \"48\",\n    \"FTA\": \"106\",\n    \"FTM\": \"64\",\n    \"FT_PCT\": \"60\",\n    \"DREB\": \"35\",\n    \"OREB\": \"23\",\n    \"TREB\": \"58\",\n    \"BLK\": \"00\",\n    \"AST\": \"88\",\n    \"STL\": \"53\",\n    \"TOV\": \"34\",\n    \"PTS\": \"3228\",\n    \"MIN\": \"6060\"\n  },\n  \"H2\": {\n    \"FG3A\": \"45\",\n    \"FG3M\": \"22\",\n    \"FG3_PCT\": \"49\",\n    \"FGA\": \"1920\",\n    \"FGM\": \"1010\",\n    \"FG_PCT\": \"53\",\n    \"FTA\": \"85\",\n    \"FTM\": \"55\",\n    \"FT_PCT\": \"65\",\n    \"DREB\": \"612\",\n    \"OREB\": \"22\",\n    \"TREB\": \"634\",\n    \"BLK\": \"00\",\n    \"AST\": \"98\",\n    \"STL\": \"35\",\n    \"TOV\": \"36\",\n    \"PTS\": \"2727\",\n    \"MIN\": \"6060\"\n  },\n  \"Q1\": {\n    \"FG3A\": \"6\",\n    \"FG3M\": \"4\",\n    \"FG3_PCT\": \"67\",\n    \"FGA\": \"23\",\n    \"FGM\": \"11\",\n    \"FG_PCT\": \"48\",\n    \"FTA\": \"10\",\n    \"FTM\": \"6\",\n    \"FT_PCT\": \"60\",\n    \"DREB\": \"3\",\n    \"OREB\": \"2\",\n    \"TREB\": \"5\",\n    \"BLK\": \"0\",\n    \"AST\": \"8\",\n    \"STL\": \"5\",\n    \"TOV\": \"3\",\n    \"PTS\": \"32\",\n    \"MIN\": \"60\"\n  },\n  \"Q2\": {\n    \"FG3A\": \"9\",\n    \"FG3M\": \"4\",\n    \"FG3_PCT\": \"44\",\n    \"FGA\": \"21\",\n    \"FGM\": \"10\",\n    \"FG_PCT\": \"48\",\n    \"FTA\": \"6\",\n    \"FTM\": \"4\",\n    \"FT_PCT\": \"67\",\n    \"DREB\": \"5\",\n    \"OREB\": \"3\",\n    \"TREB\": \"8\",\n    \"BLK\": \"0\",\n    \"AST\": \"8\",\n    \"STL\": \"3\",\n    \"TOV\": \"4\",\n    \"PTS\": \"28\",\n    \"MIN\": \"60\"\n  },\n  \"Q3\": {\n    \"FG3A\": \"4\",\n    \"FG3M\": \"2\",\n    \"FG3_PCT\": \"50\",\n    \"FGA\": \"19\",\n    \"FGM\": \"10\",\n    \"FG_PCT\": \"53\",\n    \"FTA\": \"8\",\n    \"FTM\": \"5\",\n    \"FT_PCT\": \"62\",\n    \"DREB\": \"6\",\n    \"OREB\": \"2\",\n    \"TREB\": \"8\",\n    \"BLK\": \"0\",\n    \"AST\": \"9\",\n    \"STL\": \"3\",\n    \"TOV\": \"3\",\n    \"PTS\": \"27\",\n    \"MIN\": \"60\"\n  },\n  \"Q4\": {\n    \"FG3A\": \"5\",\n    \"FG3M\": \"2\",\n    \"FG3_PCT\": \"40\",\n    \"FGA\": \"20\",\n    \"FGM\": \"10\",\n    \"FG_PCT\": \"50\",\n    \"FTA\": \"5\",\n    \"FTM\": \"5\",\n    \"FT_PCT\": \"100\",\n    \"DREB\": \"12\",\n    \"OREB\": \"2\",\n    \"TREB\": \"14\",\n    \"BLK\": \"0\",\n    \"AST\": \"8\",\n    \"STL\": \"5\",\n    \"TOV\": \"6\",\n    \"PTS\": \"27\",\n    \"MIN\": \"60\"\n  },\n  \"OT\": {\n    \"FG3A\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FGA\": \"0\",\n    \"FGM\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FTA\": \"0\",\n    \"FTM\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"DREB\": \"0\",\n    \"OREB\": \"0\",\n    \"TREB\": \"0\",\n    \"BLK\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"TOV\": \"0\",\n    \"PTS\": \"0\",\n    \"MIN\": \"0\"\n  }\n},\n\"box_score\": [\n  {\n    \"first_name\": \"Chris\",\n    \"last_name\": \"Bosh\",\n    \"name\": \"Chris Bosh\",\n    \"starter\": \"True\",\n    \"MIN\": \"33\",\n    \"FGM\": \"9\",\n    \"FGA\": \"17\",\n    \"FG_PCT\": \"53\",\n    \"FG3M\": \"2\",\n    \"FG3A\": \"5\",\n    \"FG3_PCT\": \"40\",\n    \"FTM\": \"10\",\n    \"FTA\": \"11\",\n    \"FT_PCT\": \"91\",\n    \"OREB\": \"3\",\n    \"DREB\": \"5\",\n    \"TREB\": \"8\",\n    \"AST\": \"4\",\n    \"STL\": \"2\",\n    \"BLK\": \"0\",\n    \"TOV\": \"3\",\n    \"PF\": \"2\",\n    \"PTS\": \"30\",\n    \"+/-\": \"10\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Dwyane\",\n    \"last_name\": \"Wade\",\n    \"name\": \"Dwyane Wade\",\n    \"starter\": \"True\",\n    \"MIN\": \"32\",\n    \"FGM\": \"4\",\n    \"FGA\": \"18\",\n    \"FG_PCT\": \"22\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"1\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"1\",\n    \"FTA\": \"3\",\n    \"FT_PCT\": \"33\",\n    \"OREB\": \"1\",\n    \"DREB\": \"2\",\n    \"TREB\": \"3\",\n    \"AST\": \"10\",\n    \"STL\": \"3\",\n    \"BLK\": \"0\",\n    \"TOV\": \"6\",\n    \"PF\": \"1\",\n    \"PTS\": \"9\",\n    \"+/-\": \"13\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Luol\",\n    \"last_name\": \"Deng\",\n    \"name\": \"Luol Deng\",\n    \"starter\": \"True\",\n    \"MIN\": \"29\",\n    \"FGM\": \"7\",\n    \"FGA\": \"11\",\n    \"FG_PCT\": \"64\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"3\",\n    \"FG3_PCT\": \"33\",\n    \"FTM\": \"0\",\n    \"FTA\": \"1\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"2\",\n    \"DREB\": \"2\",\n    \"TREB\": \"4\",\n    \"AST\": \"2\",\n    \"STL\": \"2\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"0\",\n    \"PTS\": \"15\",\n    \"+/-\": \"4\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Shawne\",\n    \"last_name\": \"Williams\",\n    \"name\": \"Shawne Williams\",\n    \"starter\": \"True\",\n    \"MIN\": \"29\",\n    \"FGM\": \"5\",\n    \"FGA\": \"9\",\n    \"FG_PCT\": \"56\",\n    \"FG3M\": \"3\",\n    \"FG3A\": \"5\",\n    \"FG3_PCT\": \"60\",\n    \"FTM\": \"2\",\n    \"FTA\": \"2\",\n    \"FT_PCT\": \"100\",\n    \"OREB\": \"0\",\n    \"DREB\": \"4\",\n    \"TREB\": \"4\",\n    \"AST\": \"4\",\n    \"STL\": \"1\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"4\",\n    \"PTS\": \"15\",\n    \"+/-\": \"16\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Norris\",\n    \"last_name\": \"Cole\",\n    \"name\": \"Norris Cole\",\n    \"starter\": \"True\",\n    \"MIN\": \"27\",\n    \"FGM\": \"4\",\n    \"FGA\": \"7\",\n    \"FG_PCT\": \"57\",\n    \"FG3M\": \"2\",\n    \"FG3A\": \"4\",\n    \"FG3_PCT\": \"50\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"1\",\n    \"TREB\": \"1\",\n    \"AST\": \"4\",\n    \"STL\": \"2\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"1\",\n    \"PTS\": \"10\",\n    \"+/-\": \"6\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Mario\",\n    \"last_name\": \"Chalmers\",\n    \"name\": \"Mario Chalmers\",\n    \"starter\": \"False\",\n    \"MIN\": \"25\",\n    \"FGM\": \"6\",\n    \"FGA\": \"9\",\n    \"FG_PCT\": \"67\",\n    \"FG3M\": \"2\",\n    \"FG3A\": \"2\",\n    \"FG3_PCT\": \"100\",\n    \"FTM\": \"6\",\n    \"FTA\": \"10\",\n    \"FT_PCT\": \"60\",\n    \"OREB\": \"0\",\n    \"DREB\": \"2\",\n    \"TREB\": \"2\",\n    \"AST\": \"4\",\n    \"STL\": \"4\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"1\",\n    \"PTS\": \"20\",\n    \"+/-\": \"18\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Shabazz\",\n    \"last_name\": \"Napier\",\n    \"name\": \"Shabazz Napier\",\n    \"starter\": \"False\",\n    \"MIN\": \"20\",\n    \"FGM\": \"2\",\n    \"FGA\": \"3\",\n    \"FG_PCT\": \"67\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"2\",\n    \"FG3_PCT\": \"50\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"3\",\n    \"TREB\": \"3\",\n    \"AST\": \"4\",\n    \"STL\": \"2\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"4\",\n    \"PTS\": \"5\",\n    \"+/-\": \"11\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Chris\",\n    \"last_name\": \"Andersen\",\n    \"name\": \"Chris Andersen\",\n    \"starter\": \"False\",\n    \"MIN\": \"17\",\n    \"FGM\": \"0\",\n    \"FGA\": \"2\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"1\",\n    \"DREB\": \"2\",\n    \"TREB\": \"3\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"2\",\n    \"PTS\": \"0\",\n    \"+/-\": \"6\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Josh\",\n    \"last_name\": \"McRoberts\",\n    \"name\": \"Josh McRoberts\",\n    \"starter\": \"False\",\n    \"MIN\": \"11\",\n    \"FGM\": \"1\",\n    \"FGA\": \"3\",\n    \"FG_PCT\": \"33\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"1\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"1\",\n    \"FTA\": \"2\",\n    \"FT_PCT\": \"50\",\n    \"OREB\": \"0\",\n    \"DREB\": \"3\",\n    \"TREB\": \"3\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"2\",\n    \"PF\": \"3\",\n    \"PTS\": \"3\",\n    \"+/-\": \"1\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"James\",\n    \"last_name\": \"Ennis\",\n    \"name\": \"James Ennis\",\n    \"starter\": \"False\",\n    \"MIN\": \"7\",\n    \"FGM\": \"2\",\n    \"FGA\": \"3\",\n    \"FG_PCT\": \"67\",\n    \"FG3M\": \"1\",\n    \"FG3A\": \"1\",\n    \"FG3_PCT\": \"100\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"1\",\n    \"DREB\": \"1\",\n    \"TREB\": \"2\",\n    \"AST\": \"1\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"1\",\n    \"PTS\": \"5\",\n    \"+/-\": \"2\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Justin\",\n    \"last_name\": \"Hamilton\",\n    \"name\": \"Justin Hamilton\",\n    \"starter\": \"False\",\n    \"MIN\": \"5\",\n    \"FGM\": \"1\",\n    \"FGA\": \"1\",\n    \"FG_PCT\": \"100\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"1\",\n    \"DREB\": \"1\",\n    \"TREB\": \"2\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"0\",\n    \"PTS\": \"2\",\n    \"+/-\": \"3\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Andre\",\n    \"last_name\": \"Dawkins\",\n    \"name\": \"Andre Dawkins\",\n    \"starter\": \"False\",\n    \"MIN\": \"1\",\n    \"FGM\": \"0\",\n    \"FGA\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"0\",\n    \"TREB\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"1\",\n    \"PF\": \"1\",\n    \"PTS\": \"0\",\n    \"+/-\": \"0\",\n    \"DOUBLE\": \"none\"\n  },\n  {\n    \"first_name\": \"Shannon\",\n    \"last_name\": \"Brown\",\n    \"name\": \"Shannon Brown\",\n    \"starter\": \"False\",\n    \"MIN\": \"0\",\n    \"FGM\": \"0\",\n    \"FGA\": \"0\",\n    \"FG_PCT\": \"0\",\n    \"FG3M\": \"0\",\n    \"FG3A\": \"0\",\n    \"FG3_PCT\": \"0\",\n    \"FTM\": \"0\",\n    \"FTA\": \"0\",\n    \"FT_PCT\": \"0\",\n    \"OREB\": \"0\",\n    \"DREB\": \"0\",\n    \"TREB\": \"0\",\n    \"AST\": \"0\",\n    \"STL\": \"0\",\n    \"BLK\": \"0\",\n    \"TOV\": \"0\",\n    \"PF\": \"0\",\n    \"PTS\": \"0\",\n    \"+/-\": \"0\",\n    \"DOUBLE\": \"none\"\n  }\n],\n\"next_game\": {\n  \"day\": \"2\",\n  \"month\": \"November\",\n  \"year\": \"2014\",\n  \"dayname\": \"Sunday\",\n  \"stadium\": \"American Airlines Arena\",\n  \"city\": \"Miami\",\n  \"opponent_name\": \"Raptors\",\n  \"opponent_place\": \"Toronto\",\n  \"is_home\": \"True\"\n}\n}\n},\n\"summaries\": [\n\"The Miami Heat ( 20 ) defeated the Philadelphia 76ers ( 0 - 3 ) 114 - 96 on Saturday . Chris Bosh scored a game - high 30 points to go with eight rebounds in 33 minutes . Josh McRoberts made his Heat debut after missing the entire preseason recovering from toe surgery . McRoberts came off the bench and played 11 minutes . Shawne Williams was once again the starter at power forward in McRoberts ' stead . Williams finished with 15 points and three three - pointers in 29 minutes . Mario Chalmers scored 18 points in 25 minutes off the bench . Luc Richard Mbah a Moute replaced Chris Johnson in the starting lineup for the Sixers on Saturday . Hollis Thompson shifted down to the starting shooting guard job to make room for Mbah a Moute . Mbah a Moute finished with nine points and seven rebounds in 19 minutes . K.J . McDaniels , who suffered a minor hip flexor injury in Friday 's game , was available and played 21 minutes off the bench , finishing with eight points and three blocks . Michael Carter-Williams is expected to be out until Nov. 13 , but Tony Wroten continues to put up impressive numbers in Carter-Williams ' absence . Wroten finished with a double - double of 21 points and 10 assists in 33 minutes . The Heat will complete a back - to - back set at home Sunday against the Tornoto Raptors . The Sixers ' next game is at home Monday against the Houston Rockets .\"\n]\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eTrain: NBA seasons - 2014, 2015, \u0026#x26; 2016; total instances - 3690\u003c/li\u003e\n              \u003cli\u003eValidation: NBA seasons - 2017; total instances - 1230\u003c/li\u003e\n              \u003cli\u003eTest: NBA seasons - 2018; total instances - 1230\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits were created as per different NBA seasons. All the games in regular season (no play-offs) are\n              added in the dataset\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset contains a data analytics problem in the classic sense (\u003ca\n                href=\"https://aclanthology.org/W07-2315\"\u003eReiter, 2007\u003c/a\u003e). That is, there is a large amount of data\n              from which insights need to be selected. Further, the insights should be both from simple shallow queries\n              (such as dirext transcriptions of the properties of a subject, i.e., a player and their statistics), as\n              well as aggregated (how a player has done over time). There is far more on the data side than is required\n              to be realised, and indeed, could be practically realised. This depth of data analytics problem does not\n              exist in other datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMany, if not all aspects of data-to-text systems can be measured with this dataset. It has complex data\n              analytics, meaninful document planning (10-15 sentence documents with a narrative structure), as well as\n              microplanning and realisation requirements. Finding models to handle this volume of data, as well as\n              methods for meaninfully evaluate generations is a very open question.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor dataset discussion see \u003ca href=\"https://aclanthology.org/2020.intellang-1.4/\"\u003eThomson et al, 2020\u003c/a\u003e\n            \u003c/p\u003e\n            \u003cp\u003eFor evaluation see:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2021.inlg-1.23\"\u003eThomson \u0026#x26; Reiter 2020, Thomson \u0026#x26; Reiter\n                  (2021)\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2021.inlg-1.25\"\u003eKasner et al (2021)\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eFor a system using the relational database form of SportSett, see:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.inlg-1.6/\"\u003eThomson et al (2020)\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eFor recent systems using the Rotowire dataset, see:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://github.com/ratishsp/data2text-macro-plan-py\"\u003ePuduppully \u0026#x26; Lapata (2021)\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://github.com/KaijuML/data-to-text-hierarchical\"\u003eRebuffel et all (2020)\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMany, if not all aspects of data-to-text systems can be measured with this dataset. It has complex data\n              analytics, meaninful document planning (10-15 sentence documents with a narrative structure), as well as\n              microplanning and realisation requirements. Finding models to handle this volume of data, as well as\n              methods for meaninfully evaluate generations is a very open question.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU is the only off-the-shelf metric commonly used. Works have also used custom metrics like RG (\u003ca\n                href=\"https://aclanthology.org/D17-1239\"\u003eWiseman et al, 2017\u003c/a\u003e), and a recent shared task explored\n              other metrics and their corrolation with human evaluation (\u003ca\n                href=\"https://aclanthology.org/2021.inlg-1.23\"\u003eThomson \u0026#x26; Reiter, 2021\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMost results from prior works use the original Rotowire dataset, which has train/validation/test\n              contamination. For results of BLEU and RG on the relational database format of SportSett, as a guide, see\n              \u003ca href=\"https://aclanthology.org/2020.inlg-1.6\"\u003eThomson et al, 2020\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe results on this dataset are largely unexplored, as is the selection of suitable metrics that\n              correlate with human judgment. See Thomson et al, 2021 (\u003ca\n                href=\"https://aclanthology.org/2021.inlg-1.23\"\u003ehttps://aclanthology.org/2021.inlg-1.23\u003c/a\u003e) for an\n              overview, and Kasner et al (2021) for the best performing metric at the time of writing (\u003ca\n                href=\"https://aclanthology.org/2021.inlg-1.25\"\u003ehttps://aclanthology.org/2021.inlg-1.25\u003c/a\u003e).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe references texts were taken from the existing dataset RotoWire-FG (\u003ca\n                href=\"https://www.aclweb.org/anthology/W19-8639\"\u003eWang, 2019\u003c/a\u003e), which is in turn based on Rotowire (\u003ca\n                href=\"https://aclanthology.org/D17-1239\"\u003eWiseman et al, 2017\u003c/a\u003e). The rationale behind this dataset was\n              to re-structure the data such that aggregate statistics over multiple games, as well as upcoming game\n              schedules could be included, moving the dataset from snapshots of single games, to a format where almost\n              everything that could be present in the reference texts could be found in the data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCreate a summary of a basketball, with insightful facts about the game, teams, and players, both within\n              the game, withing periods during the game, and over the course of seasons/careers where appropriate. This\n              is a data-to-text problem in the classic sense (\u003ca href=\"https://aclanthology.org/W07-2315\"\u003eReiter,\n                2007\u003c/a\u003e) in that it has a difficult data analystics state, in addition to ordering and transcription of\n              selected facts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRotoWire-FG (\u003ca href=\"https://www.rotowire.com\"\u003ehttps://www.rotowire.com\u003c/a\u003e).\n              Wikipedia (\u003ca href=\"https://en.wikipedia.org/wiki/Main_Page\"\u003ehttps://en.wikipedia.org/wiki/Main_Page\u003c/a\u003e)\n              Basketball Reference (\u003ca\n                href=\"https://www.basketball-reference.com\"\u003ehttps://www.basketball-reference.com\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummaries of basketball games (in the NBA).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt retains the original tokenization scheme employed by Wang 2019\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emanually\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGames from the 2014 through 2018 seasons were selected. Within these seasons games are not filtered, all\n              are present, but this was an arbitrary solution from the original RotoWirte-FG dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset consits of a pre-existing dataset, as well as publically available facts.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUnaware of any work, but, this is a dataset considting solely of summaries of mens professional\n              basketball games. It does not cover different levels of the sport, or different genders, and all pronouns\n              are likely to be male unless a specific player is referred to by other pronouns in the training text. This\n              makes it difficult to train systems where gender can be specified as an attribute, although it is an\n              interesting, open problem that could be investigated using the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo, it is very specifically American English from the sports journalism domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAll information relating to persons is of public record.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSportSett resolved the major overlap problems of RotoWire, although some overlap is unavoidable. For\n              example, whilst it is not possible to find career totals and other historic information for all players\n              (the data only goes back to 2014), it is possible to do so for some players. It is unavoidable that some\n              data which is aggregated, exists in its base form in previous partitions. The season-based partition\n              scheme heavily constrains this however.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFactual accuray continues to be a problem, systems may incorrectly represent the facts of the game.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUsing the RG metric to maximise the number of true facts in a generate summary is not nececeraly\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"sportsett_basketball","type":"Data-to-Text","languages":"English","summary":"The sportsett dataset is an English data-to-text dataset in the basketball domain. The inputs are statistics summarizing an NBA game and the outputs are high-quality descriptions of the game in natural language."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"sportsett_basketball"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/squad_v2.html b/data_cards/squad_v2.html
index 3a85f879..667c46fd 100644
--- a/data_cards/squad_v2.html
+++ b/data_cards/squad_v2.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->squad_v2</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">squad_v2</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Question Generation</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->squad_v2</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">squad_v2</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Question Generation</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1765,4 +1765,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"squad_v2","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003esquad_v2\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eSQuAD2.0 is a dataset that tests the ability of a system to not only answer reading comprehension questions,\n          but also abstain when presented with a question that cannot be answered based on the provided paragraph. F1\n          score is used to evaluate models on the leaderboard. In GEM, we are using this dataset for the\n          question-generation task in which a model should generate squad-like questions from an input text.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/squad_v2')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/squad_v2\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://rajpurkar.github.io/SQuAD-explorer/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1806.03822v1\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ePranav Rajpurkar, Robin Jia and Percy Liang\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eRobin Jia\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGiven an input passage and an answer span, the goal is to generate a question that asks for the answer.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://rajpurkar.github.io/SQuAD-explorer/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://rajpurkar.github.io/SQuAD-explorer/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1806.03822v1\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{Rajpurkar2018KnowWY,\ntitle={Know What You Don’t Know: Unanswerable Questions for SQuAD},\nauthor={Pranav Rajpurkar and Robin Jia and Percy Liang},\nbooktitle={ACL},\nyear={2018}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRobin Jia\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:robinjia@stanford.edu\"\u003erobinjia@stanford.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://rajpurkar.github.io/SQuAD-explorer/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSQuAD2.0 tests the ability of a system to not only answer reading comprehension questions, but also\n              abstain when presented with a question that cannot be answered based on the provided paragraph. F1 score\n              is used to evaluate models on the leaderboard.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe idea behind SQuAD2.0 dataset is to make the models understand when a question cannot be answered\n              given a context. This will help in building models such that they know what they don't know, and therefore\n              make the models understand language at a deeper level. The tasks that can be supported by the dataset are\n              machine reading comprehension, extractive QA, and question generation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQuestion Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGiven an input passage and an answer span, the goal is to generate a question that asks for the answer.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eStanford University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePranav Rajpurkar, Robin Jia and Percy Liang\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFacebook and NSF Graduate Research Fellowship under Grant No. DGE-114747\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e(Abinaya Mahendiran)[\u003ca href=\"https://github.com/AbinayaM02\"\u003ehttps://github.com/AbinayaM02\u003c/a\u003e], Manager\n              Data Science, NEXT Labs,\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data fields are the same among all splits.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field\"\u003e\n\n            \u003ch5\u003esquad_v2\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eid\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etitle\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003econtext\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003equestion\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eanswers\u003c/code\u003e: a dictionary feature containing:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003etext\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003eanswer_start\u003c/code\u003e: a \u003ccode\u003eint32\u003c/code\u003e feature.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHere is an example of a validation data point. This example was too long and was cropped:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"gem_id\": \"gem-squad_v2-validation-1\",\n\"id\": \"56ddde6b9a695914005b9629\",\n\"answers\": {\n  \"answer_start\": [94, 87, 94, 94],\n  \"text\": [\"10th and 11th centuries\", \"in the 10th and 11th centuries\", \"10th and 11th centuries\", \"10th and 11th centuries\"]\n},\n\"context\": \"\\\"The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave thei...\",\n\"question\": \"When were the Normans in Normandy?\",\n\"title\": \"Normans\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original SQuAD2.0 dataset has only training and dev (validation) splits. The train split is further\n              divided into test split and added as part of the GEM datasets.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003ename\u003c/th\u003e\n                    \u003cth align=\"right\"\u003etrain\u003c/th\u003e\n                    \u003cth align=\"right\"\u003evalidation\u003c/th\u003e\n                    \u003cth align=\"right\"\u003etest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esquad_v2\u003c/td\u003e\n                    \u003ctd align=\"right\"\u003e90403\u003c/td\u003e\n                    \u003ctd align=\"right\"\u003e11873\u003c/td\u003e\n                    \u003ctd align=\"right\"\u003e39916\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSQuAD2.0 will encourage the development of new reading comprehension models\n              that know what they don’t know, and therefore understand language at a deeper level. It can also help in\n              building better models for answer-aware question generation .\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning capability\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe train(80%) and validation(10%) split of SQuAD2.0 are made available to public whereas the test(10%)\n              split is not available.\u003c/p\u003e\n            \u003cp\u003eAs part of GEM, the train split, 80% of the original data is split into two train split (90%) and test\n              split (remaining 10%). The idea is to provide all three splits for the users to use.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eExtractive QA, Question Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eExtractive QA uses Exact Match and F1 Score\u003c/li\u003e\n              \u003cli\u003eQuestion generation users METEOR, ROUGE-L, BLEU-4\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQuestion generation users METEOR, ROUGE-L, BLEU-4\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e@article{Dong2019UnifiedLM,\n              title={Unified Language Model Pre-training for Natural Language Understanding and Generation},\n              author={Li Dong and Nan Yang and Wenhui Wang and Furu Wei and Xiaodong Liu and Yu Wang and Jianfeng Gao\n              and M. Zhou and Hsiao-Wuen Hon},\n              journal={ArXiv},\n              year={2019},\n              volume={abs/1905.03197}\n              }\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is curated in three stages:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eCurating passages,\u003c/li\u003e\n              \u003cli\u003eCrowdsourcing question-answers on those passages,\u003c/li\u003e\n              \u003cli\u003eObtaining additional answers\n                As part of SQuAD1.1, 10000 high-quality articles from English Wikipedia is extracted using Project\n                Nayuki’s Wikipedia’s internal PageRanks, from which 536 articles are sampled uniformly at random. From\n                each of these articles, individual paragraphs are extracted, stripping away images, figures, tables, and\n                discarding paragraphs shorter than 500 characters.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eSQuAD2.0 combines the 100,000 questions in SQuAD1.1 with over 50,000 unanswerable questions written\n              adversarially by crowdworkers to look similar to answerable ones.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo build systems that not only answer questions when possible, but also determine when no\n              answer is supported by the paragraph and abstain from answering.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikipedia\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset contains 536 articles covering a wide range of topics, from musical celebrities to abstract\n              concepts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFrom the sampled articles from Wikipedia, individual paragraphs are extracted, stripping\n              away images, figures, tables, and discarding paragraphs shorter than 500 characters and partitioned into\n              training(80%), development set(10%) and test set(10%).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo retrieve high-quality articles, Project Nayuki’s Wikipedia’s internal PageRanks was used to obtain the\n              top 10000 articles of English Wikipedia, from which 536 articles are sampled uniformly at random.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrowdworkers from the United States or Canada with a 97% HIT acceptance rate, a minimum of 1000 HITs,\n              were employed to create questions.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhich Annotation Service\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhich annotation services were used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e, \u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor SQuAD 1.1 , crowdworkers were tasked with asking and answering up to 5 questions on the\n              content of that paragraph. The questions had to be entered in a text field, and the answers had to be\n              highlighted in the paragraph.\u003c/p\u003e\n            \u003cp\u003eFor SQuAD2.0, each task consisted of an entire article from SQuAD 1.1. For each paragraph in the article,\n              workers were asked to pose up to five questions that were impossible to answer\n              based on the paragraph alone, while referencing entities in the paragraph and ensuring that a plausible\n              answer is present.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by another rater\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQuestions from workers who wrote 25 or fewer questions on an article is removed; this filter\n              helped remove noise from workers who had trouble understanding the task, and therefore quit before\n              completing the whole article. This filter to both SQuAD2.0 and the existing answerable questions from\n              SQuAD 1.1.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"squad_v2","type":"Question Generation","languages":"English","summary":"SQuAD2.0 is a dataset that tests the ability of a system to not only answer reading comprehension questions, but also abstain when presented with a question that cannot be answered based on the provided paragraph.  F1 score is used to evaluate models on the leaderboard. In GEM, we are using this dataset for the question-generation task in which a model should generate squad-like questions from an input text."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"squad_v2"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"squad_v2","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003esquad_v2\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eSQuAD2.0 is a dataset that tests the ability of a system to not only answer reading comprehension questions,\n          but also abstain when presented with a question that cannot be answered based on the provided paragraph. F1\n          score is used to evaluate models on the leaderboard. In GEM, we are using this dataset for the\n          question-generation task in which a model should generate squad-like questions from an input text.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/squad_v2')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/squad_v2\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://rajpurkar.github.io/SQuAD-explorer/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1806.03822v1\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ePranav Rajpurkar, Robin Jia and Percy Liang\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eRobin Jia\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGiven an input passage and an answer span, the goal is to generate a question that asks for the answer.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://rajpurkar.github.io/SQuAD-explorer/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://rajpurkar.github.io/SQuAD-explorer/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1806.03822v1\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{Rajpurkar2018KnowWY,\ntitle={Know What You Don’t Know: Unanswerable Questions for SQuAD},\nauthor={Pranav Rajpurkar and Robin Jia and Percy Liang},\nbooktitle={ACL},\nyear={2018}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRobin Jia\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:robinjia@stanford.edu\"\u003erobinjia@stanford.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://rajpurkar.github.io/SQuAD-explorer/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSQuAD2.0 tests the ability of a system to not only answer reading comprehension questions, but also\n              abstain when presented with a question that cannot be answered based on the provided paragraph. F1 score\n              is used to evaluate models on the leaderboard.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe idea behind SQuAD2.0 dataset is to make the models understand when a question cannot be answered\n              given a context. This will help in building models such that they know what they don't know, and therefore\n              make the models understand language at a deeper level. The tasks that can be supported by the dataset are\n              machine reading comprehension, extractive QA, and question generation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQuestion Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGiven an input passage and an answer span, the goal is to generate a question that asks for the answer.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eStanford University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePranav Rajpurkar, Robin Jia and Percy Liang\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFacebook and NSF Graduate Research Fellowship under Grant No. DGE-114747\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e(Abinaya Mahendiran)[\u003ca href=\"https://github.com/AbinayaM02\"\u003ehttps://github.com/AbinayaM02\u003c/a\u003e], Manager\n              Data Science, NEXT Labs,\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data fields are the same among all splits.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field\"\u003e\n\n            \u003ch5\u003esquad_v2\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eid\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etitle\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003econtext\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003equestion\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eanswers\u003c/code\u003e: a dictionary feature containing:\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003etext\u003c/code\u003e: a \u003ccode\u003estring\u003c/code\u003e feature.\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003eanswer_start\u003c/code\u003e: a \u003ccode\u003eint32\u003c/code\u003e feature.\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHere is an example of a validation data point. This example was too long and was cropped:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"gem_id\": \"gem-squad_v2-validation-1\",\n\"id\": \"56ddde6b9a695914005b9629\",\n\"answers\": {\n  \"answer_start\": [94, 87, 94, 94],\n  \"text\": [\"10th and 11th centuries\", \"in the 10th and 11th centuries\", \"10th and 11th centuries\", \"10th and 11th centuries\"]\n},\n\"context\": \"\\\"The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave thei...\",\n\"question\": \"When were the Normans in Normandy?\",\n\"title\": \"Normans\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original SQuAD2.0 dataset has only training and dev (validation) splits. The train split is further\n              divided into test split and added as part of the GEM datasets.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003ename\u003c/th\u003e\n                    \u003cth align=\"right\"\u003etrain\u003c/th\u003e\n                    \u003cth align=\"right\"\u003evalidation\u003c/th\u003e\n                    \u003cth align=\"right\"\u003etest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esquad_v2\u003c/td\u003e\n                    \u003ctd align=\"right\"\u003e90403\u003c/td\u003e\n                    \u003ctd align=\"right\"\u003e11873\u003c/td\u003e\n                    \u003ctd align=\"right\"\u003e39916\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSQuAD2.0 will encourage the development of new reading comprehension models\n              that know what they don’t know, and therefore understand language at a deeper level. It can also help in\n              building better models for answer-aware question generation .\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning capability\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe train(80%) and validation(10%) split of SQuAD2.0 are made available to public whereas the test(10%)\n              split is not available.\u003c/p\u003e\n            \u003cp\u003eAs part of GEM, the train split, 80% of the original data is split into two train split (90%) and test\n              split (remaining 10%). The idea is to provide all three splits for the users to use.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eExtractive QA, Question Generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eExtractive QA uses Exact Match and F1 Score\u003c/li\u003e\n              \u003cli\u003eQuestion generation users METEOR, ROUGE-L, BLEU-4\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQuestion generation users METEOR, ROUGE-L, BLEU-4\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e@article{Dong2019UnifiedLM,\n              title={Unified Language Model Pre-training for Natural Language Understanding and Generation},\n              author={Li Dong and Nan Yang and Wenhui Wang and Furu Wei and Xiaodong Liu and Yu Wang and Jianfeng Gao\n              and M. Zhou and Hsiao-Wuen Hon},\n              journal={ArXiv},\n              year={2019},\n              volume={abs/1905.03197}\n              }\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is curated in three stages:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eCurating passages,\u003c/li\u003e\n              \u003cli\u003eCrowdsourcing question-answers on those passages,\u003c/li\u003e\n              \u003cli\u003eObtaining additional answers\n                As part of SQuAD1.1, 10000 high-quality articles from English Wikipedia is extracted using Project\n                Nayuki’s Wikipedia’s internal PageRanks, from which 536 articles are sampled uniformly at random. From\n                each of these articles, individual paragraphs are extracted, stripping away images, figures, tables, and\n                discarding paragraphs shorter than 500 characters.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eSQuAD2.0 combines the 100,000 questions in SQuAD1.1 with over 50,000 unanswerable questions written\n              adversarially by crowdworkers to look similar to answerable ones.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo build systems that not only answer questions when possible, but also determine when no\n              answer is supported by the paragraph and abstain from answering.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikipedia\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset contains 536 articles covering a wide range of topics, from musical celebrities to abstract\n              concepts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFrom the sampled articles from Wikipedia, individual paragraphs are extracted, stripping\n              away images, figures, tables, and discarding paragraphs shorter than 500 characters and partitioned into\n              training(80%), development set(10%) and test set(10%).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo retrieve high-quality articles, Project Nayuki’s Wikipedia’s internal PageRanks was used to obtain the\n              top 10000 articles of English Wikipedia, from which 536 articles are sampled uniformly at random.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrowdworkers from the United States or Canada with a 97% HIT acceptance rate, a minimum of 1000 HITs,\n              were employed to create questions.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e0\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhich Annotation Service\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhich annotation services were used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e, \u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor SQuAD 1.1 , crowdworkers were tasked with asking and answering up to 5 questions on the\n              content of that paragraph. The questions had to be entered in a text field, and the answers had to be\n              highlighted in the paragraph.\u003c/p\u003e\n            \u003cp\u003eFor SQuAD2.0, each task consisted of an entire article from SQuAD 1.1. For each paragraph in the article,\n              workers were asked to pose up to five questions that were impossible to answer\n              based on the paragraph alone, while referencing entities in the paragraph and ensuring that a plausible\n              answer is present.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by another rater\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eQuestions from workers who wrote 25 or fewer questions on an article is removed; this filter\n              helped remove noise from workers who had trouble understanding the task, and therefore quit before\n              completing the whole article. This filter to both SQuAD2.0 and the existing answerable questions from\n              SQuAD 1.1.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"squad_v2","type":"Question Generation","languages":"English","summary":"SQuAD2.0 is a dataset that tests the ability of a system to not only answer reading comprehension questions, but also abstain when presented with a question that cannot be answered based on the provided paragraph.  F1 score is used to evaluate models on the leaderboard. In GEM, we are using this dataset for the question-generation task in which a model should generate squad-like questions from an input text."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"squad_v2"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/squality.html b/data_cards/squality.html
index 9851031c..4e543e48 100644
--- a/data_cards/squality.html
+++ b/data_cards/squality.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->squality</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">squality</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->squality</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">squality</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1718,4 +1718,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"squality","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003esquality\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eSQuALITY (Summarization-format QUestion Answering with Long Input Texts, Yes!) is a summarization dataset\n          that is:\u003c/p\u003e\n        \u003cul\u003e\n          \u003cli\u003eAbstractive\u003c/li\u003e\n          \u003cli\u003eLong-input: The input document are short stories between 3000--6000 words.\u003c/li\u003e\n          \u003cli\u003eQuestion-focused: Each story is associated with multiple question-summary pairs.\u003c/li\u003e\n          \u003cli\u003eMulti-reference: Each question is paired with 4 summaries.\u003c/li\u003e\n          \u003cli\u003eHigh-quality: The summaries are crowdsourced from skilled and trained writers.\u003c/li\u003e\n        \u003c/ul\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/squality')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/squality\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/nyu-mll/SQuALITY\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2205.11465\"\u003eArXiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAlex Wang (NYU); Angelica Chen (NYU); Richard Yuanzhe Pang (NYU); Nitish Joshi (NYU); Samuel R. Bowman (NYU)\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAlex Wang\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGiven a question about a particular high-level aspect of a short story, provide a summary about that aspect\n          in the story (e.g., plot, character relationships, setting, theme, etc.).\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/nyu-mll/SQuALITY\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/nyu-mll/SQuALITY/data\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2205.11465\"\u003eArXiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{wang2022squality,\ntitle={S{Q}u{ALITY}: Building a Long-Document Summarization Dataset the Hard Way},\nauthor={Wang, Alex and Pang, Richard Yuanzhe and Chen, Angelica and Phang, Jason and Bowman, Samuel R.},\njournal={arXiv preprint 2205.11465},\nyear={2022}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAlex Wang\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:wangalexc@gmail.com\"\u003ewangalexc@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003estories: 1930--1970 American English\n              summaries: modern American English\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003estories: 1930--1970 American science fiction writers (predominantly American men)\n              summaries: Upwork writers (college-educated, native-English) and NYU undergraduates (English-fluent\n              college students)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esummarization research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGiven a question about a particular high-level aspect of a short story, provide a summary about that\n              aspect in the story (e.g., plot, character relationships, setting, theme, etc.).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNew York University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAlex Wang (NYU); Angelica Chen (NYU); Richard Yuanzhe Pang (NYU); Nitish Joshi (NYU); Samuel R. Bowman\n              (NYU)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEric and Wendy Schmidt; Apple; NSF\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAlex Wang (NYU)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003emetadata: Project Gutenberg ID, internal UID, Project Gutenberg license\u003c/li\u003e\n              \u003cli\u003edocument: the story\u003c/li\u003e\n              \u003cli\u003equestions: a list where each element contains\n                \u003cul\u003e\n                  \u003cli\u003equestion text: the question\u003c/li\u003e\n                  \u003cli\u003equestion number: the order in which workers answered the question\u003c/li\u003e\n                  \u003cli\u003eresponses: a list where each element contains\n                    \u003cul\u003e\n                      \u003cli\u003eworker ID: anonymous\u003c/li\u003e\n                      \u003cli\u003einternal UID\u003c/li\u003e\n                      \u003cli\u003eresponse text: the response\u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is arranged with responses grouped by question (for ease of multi-reference training and\n              evaluation) and questions grouped by story (to avoid duplicating the story in the dataset)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\"metadata\": {\"passage_id\": \"63833\", \"uid\": \"ea0017c487a245668698cf527019b2b6\", \"license\": \"\"}, \"document\": \"Story omitted for readability\", \"questions\": [{\"question_text\": \"What is the plot of the story?\", \"question_number\": 1, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Brevet Lieutenant Commander David Farragut Stryakalski III, AKA Strike, is charged with commanding a run-down and faulty vessel, the Aphrodite. Aphrodite was the brain-child of Harlan Hendricks, an engineer who ushered in new technology ten years back. All three of his creations failed spectacularly, resulting in death and a failed career. The Aphrodite was the only ship to survive, and she is now used for hauling mail back and forth between Venus and Mars.\\nStrike and Cob, the Aphrodite\\u2019s only executive to last more than six months, recount Strike\\u2019s great failures and how he ended up here. He used to fly the Ganymede, but was removed after he left his position to rescue colonists who didn\\u2019t need rescuing. Strike was no longer trustworthy in Admiral Gorman\\u2019s eyes, so he banished him to the Aphrodite. \\nThe circuit that caused the initial demise of Aphrodite was sealed off. After meeting some members of his crew, Strike orders a conference for all personnel and calls in an Engineering Officer, one I.V. Hendricks. \\nAfter Lieutenant Ivy Hendricks arrives--not I.V.--Strike immediately insults her by degrading the ship\\u2019s designer, Harlan Hendricks. As it turns out, Hendricks is his daughter, and she vows to prove him wrong and all those who doubted her father. \\nDespite their initial conflict, Strike and Hendricks\\u2019 relationship soon evolves from resentment to respect. During this time, Strike\\u2019s confidence in the Aphrodite plummets as she suffers from mechanical issues. \\nThe Aphrodite starts to heat up as they get closer to the sun. The refrigeration units could not handle the heat, causing discomfort among the crew. As they get closer, a radar contact reveals that two dreadnaughts, the Lachesis and the Atropos, are doing routine patrolling. Nothing to worry about, except the Atropos had Admiral Gorman on board, hated by Strike and Hendricks.\\nStrike and Hendricks make a joke about Gorman falling into the sun. As the temperature steadily climbs, the crew members overheat and begin fighting, resulting in a black eye. A distress signal came through from the Lachesis: the Atropos, with Gorman on board, was tumbling into the sun. The Lachesis was attempting to rescue them with an unbreakable cord, but they too were being pulled in. \\nHendricks had fixed the surge-circuit rheostat, the one her father designed, and claimed it could help them rescue the ships. After some tension, Strike agrees and they race down to the sun to pick up the drifting dreadnaughts. \\nStrike puts Hendricks in charge, but soon the heat overtakes her, and she is unable to continue. Strike takes over, attaches the Aphrodite to the Lachesis with a cord, and turns on the surge-circuit. They blast themselves out of there, rescuing the two ships and Admiral Gorman at the same time. \\nCob and Strike are awarded Spatial Cross awards, while Hendricks is promoted to an engineering position at the Bureau of Ships. The story ends with Cob and Strike flipping through the pages of an address book until they land on Canalopolis, Mars. \\n\"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"Strike joins the crew of the Aphrodite after he has made several poor decisions while he was the captain of another spaceship. He is essentially being punished by his boss, Gorman, and put somewhere where he can do little harm. His job is to deliver the mail from Venus to Mars, so it\\u2019s pretty straightforward. \\n\\nWhen he meets the Officer of the Deck, Celia Graham, he immediately becomes uncomfortable. He does not like to work with women in space, although it\\u2019s a pretty common occurrence. He holds a captain\\u2019s meeting the first day on the job, and he waits to meet his Engineering Officer, I.V. Hendricks. He makes a rude comment about how the man is late for his first meeting, but actually, the female Ivy has already shown up. \\n\\nAfter meeting Ivy formally, he makes a comment about how the ship Aphrodite was built by an imbecile. Ivy immediately tells him that he\\u2019s wrong, and she knows this because the designer of the ship was none other than her own father. \\n\\nHis first week as captain on the new ship goes very poorly. Several repairs need to be done to Aphrodite, they run  behind schedule, and the new crew members have a tough time getting a handle on Aphrodite\\u2019s intricacies. \\n\\nThe heat index in the ship begins to rise, and the crew members can no longer wear their uniforms without fainting. Suddenly a distress call comes in, and it\\u2019s coming from the Atropos, a ship Captained by Gorman, and the Lachesis. The crew members hesitate to take the oldest and most outdated machinery on a rescue trip. Strike has been in trouble for refusing to follow commands before, and he knows it\\u2019s a risky move. However, Ivy insists that she knows how to pilot the Aphrodite, and she can save the crew members on the Atropos and the Lachesis from death. They are quickly tumbling towards the sun, and they will perish if someone doesn\\u2019t do something quickly. \\n\\nIvy takes control of the ship, and the heat on the Aphrodite continues to rise steadily. Eventually, she faints from pure heat exhaustion, and she tells Strike that he must take over. He does, and he manages to essentially lasso the other two ships, and with just the right amount of power, he pulls them back into orbit. \\n\\nAt a bar, after the whole ordeal, Cob pokes fun at Strike for staying on the Aphrodite. He then admits that he actually respects Strike\\u2019s loyalty to the ship that saved his reputation. Cob asks about Strike\\u2019s relationship with Ivy, but Strike tells him that she has taken her dad\\u2019s former job, so she no longer works with him. Strike takes the moment to look up her info, presumably to restart the relationship. \\n\"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"The narrative follows commander Strike as he begins his command of the spaceship Aphrodite. Strike comes from a long line of military greats but himself is prone to poor professional decision making.\\n\\nAs he takes command, the mission is a simple mail run. However, in the course of their journey, they receive word of two ships in dire need of rescue. Strike and his engineering officer, Ivy Hendricks, decide to use the ships extremely risky surge-circuit to aid the ships.\\n\\nThe rescue is a success and the crew is hailed for its bravery in saving the doomed vessels. \"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"The story starts in a muddy swamp on Venus, where Strike, a Brevet Lieutenant Commander, is encountering his new ship, the Aphrodite, for the first time. Here on Venusport Base, he is introduced to the executive officer of the ship, a man who goes by Cob. Strike comes from a line of servicemen who were all well respected, but he himself has more of a reputation for causing trouble by saying the wrong things or deviating from mission plans. His reputation preceded him, as Cob had specific questions about some of these events. The Aphrodite was incredibly impressive when it was designed, but did not live up to its expectations. It had been refitted, and the new mission that Strike was to lead was a mail run between Venus and Mars. As he entered the ship, Strike began to meet his new crew, including Celia Graham, his Radar Officer. Strike is not used to women being on ships and is decidedly uncomfortable with the idea. As he is briefing the officers who were already present, Strike is surprised when he meets his new engineering officer, Ivy Hendricks. Ivy is the daughter of the man who designed the ship, and she is cold to Strike at first, as he is to her. However, her expertise in engineering generally, the ship specifically, and other skills as well as piloting, meant that Strike warmed up to her as their mission went on. As the ship was flying towards Mars on their route, the crew picked up a distress signal from the Lachesis, which was trying to pull the Atropos away from the gravitational pull of the sun after it was damaged in an equipment malfunction. The Admiral who had put Strike in charge of the Aphrodite was on the Atropos, and Ivy dislikes him even more than Strike does, but they know they have to try to save the crews. Strike is hesitant, but Ivy has a plan and insists that they try. She has spent all of her free time tinkering with the circuits, and takes charge. She turned the Aphrodite towards the ships in danger, and sends out a cable to connect the Aphrodite to those ships. After they are all connected, the ships continue to spin towards the sun, which causes Ivy to pass out, leaving Strike in charge. He manages to pull the ships into line and send the Aphrodite in the right direction before passing out himself. The Aphrodite has the power to pull everyone away from the Sun\\u2019s gravity, but the acceleration knocks everyone out on all three ships. In the end, it was a successful rescue mission of multiple crews. Strike and Cob find themselves in an officer\\u2019s club at the end of the story, discussing Ivy\\u2019s new job, and Strike acknowledges that Cob is right about the Aphrodite having grown on him, and plans to stay its captain.\"}]}, {\"question_text\": \"Who is Ivy Hendricks and what happens to her throughout the story?\", \"question_number\": 2, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Lieutenant Ivy Hendricks is the daughter of Harlan Hendricks, a formerly respected engineer. He created the surge-circuit, an innovation in interstellar astrogation, and he was awarded a Legion of Merit. He designed three famous ships: the Artemis, the Andromeda, and the Aphrodite, the prototype. Despite being hailed as the latest and greatest in technology, all three ships either exploded or failed. \\nAccording to Lieutenant Ivy Hendricks, their failures were due to the lack of education on board. She claimed that her father asked for the crew members to be trained in surge-circuit technology, so they could use it properly and correctly. That wish was not granted and after all three ships failed, his reputation and career were doomed. Admiral Gorman pulled the plug on his career and therefore became the target of all Lieutenant Hendricks\\u2019 hate. \\nWith a bone to pick, Lieutenant Hendricks, a knowledgeable engineer herself, comes aboard the Aphrodite to serve as her engineer and occasional pilot. She wants to prove to the world that her father\\u2019s creation was genius and deserving of praise. \\nAlthough they started off on the wrong foot, Lieutenant Hendricks and Strike, her commander, develop a friendship and appreciation for each other. They bond over their deep hatred of Admiral Gorman and the joy of piloting a ship. She soon proves herself to Strike, and he begins to trust her. Their relationship walks the fine line between friendship and romance. \\nAs the Aphrodite is attempting to rescue the fallen dreadnaughts, Lieutenant Hendricks comes up with the solution. Due to her constant tinkering on the ship, she had fixed the surge-circuit rheostat and made it ready to use. Initially, no one trusts her, seeing as the last time it was used people died. But Strike\\u2019s trust in her is strong and true, so he approves the use of the surge-circuit. Hendricks pilots the ship, but soon becomes too overheated and comes close to fainting. Strike takes over piloting and eventually activates the surge-circuit. It works and they are able to rescue the two ships, one of which had Admiral Gorman, her sworn enemy, onboard. \\nLieutenant Hendricks receives a major promotion; she is now an engineer at the Bureau of Ships. She proved them wrong, and restored her father\\u2019s legacy and good name. The story ends with their romance left in the air, but Hendricks has much to be proud of. \\n\"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"\\nLieutenant Ivy Hendricks is the new Engineering Officer on Aphrodite. Strike and Cob assume that Ivy is a man before she arrives because they are sexist and because her name is listed as I.V. in the orders. Ivy is actually the daughter of the man who designed the award-winning craft.\\n\\nShe is cold and unfriendly towards Strike after she meets him, and that\\u2019s probably because he makes a rude comment about the ship which her father created. After a couple weeks of working together, the two begin to get along very well. Strike admires Ivy\\u2019s piloting skills and her depth of knowledge about the Aphrodite. \\n\\nThe two also bond over their shared hatred of Strike\\u2019s former boss, Gorman. Strike feels as though he has ruined his career, and Ivy thinks that Gorman torpedoed her father\\u2019s career. Ivy wants nothing more than to prove that Gorman is an idiot. \\n\\nHowever, when Gorman\\u2019s ship is hurtling towards the sun and he and his crew members are about to die, Ivy sees that it\\u2019s the perfect opportunity to show Gorman just how wrong he was about the ship her father designed. It\\u2019s a very dangerous mission, but Ivy is steadfast in her decision and she\\u2019s deeply courageous. She pilots the ship for most of the rescue mission, but eventually faints from the extreme heat. She tells Strike that he needs to take over, and he does a great job. \\n\\nIvy is then promoted, and she moves to Canalopolis, Mars. She now outranks her former Captain, Strike. \\n\"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"Ivy Hendricks is the engineering officer assigned to the Aphrodite. She is the daughter of Harlan Hendricks, the ship's original designer. She is fiercely protective of her father's legacy and resents Admiral Gorman for the way he treated him.\\n\\nHendricks and Strike, form an alliance of sorts after his initial surprise of seeing a woman assigned to this officer's role. When news arrives that two ships are in danger of falling into the sun, Ivy lobbies to use her father's technology to save the ship. Strike agrees to her plan although the risks are high. The Aphrodite eventually saves the ships although Ivy faints in the process from the heat and command has to be taken over by Strike.\\n\\nThe successful mission results in a promotion for Ivy as she works as a designer in the Bureau of Ships like her father.\"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"Ivy Hendricks is the new engineering officer on the Aphrodite, having been transferred from the Antigone. She is a tall woman with dark hair and contrasting pale blue eyes, who has a very wide range of experience in ship operations and engineering. Her father, Harlan Hendricks, was the man who designed the Aphrodite, so she knows the ship needs a lot of specific training. At first, the captain did not expect her to be a woman, and managed to imply that many people found her father incompetent. Although she seemed cold at first, as she reacted to the situation, she and the captain eventually got along fairly well, as he learned to appreciate her wide skill set that ranged from engineering to piloting. Ivy and Strike also had a common enemy in the higher ranks: Space Admiral Gorman. Once Spike trusted her he appreciated that Ivy spent a lot of spare time working on the old circuits, so she knew the ship like the back of her hand. When the Aphrodite found the Lachesis and the Atropos when following up on a distress signal, Ivy new the ship well enough to be able to formulate a plan to save everyone. She piloted the Aphrodite carefully, using cables shot with a rocket to connect the three ships together, but the spinning of the ships in the heat inside meant that she passed out and had to leave Strike to take over for her. Her plan was successful; she was promoted, and instead of returning to the Aphrodite she started a design job with the Bureau of Ships.\"}]}, {\"question_text\": \"What is the relationship between Strike and Aphrodite?\", \"question_number\": 3, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Strike is a member of a famous, well-behaved, and well-trained service family. His father and grandfather served in World War II and the Atomic War, respectively. Both earned medals for their heroic service. Strike, however, did not follow in his family\\u2019s footsteps. \\n\\tWith a tendency to say the wrong thing at the wrong time, Strike often offended those around him and garnered a negative reputation. After being put in charge of the Ganymede, he soon lost his position after abandoning his station to rescue colonists who were not in danger. As well, he accused a Martian Ambassador of being a spy at a respectable ball. Admiral Gorman soon demoted him, and he became the commander of the Aphrodite. \\n\\tAt first, Strike was not a fan. He sees her as ugly, fat, and cantankerous. He misses the Ganymede, a shiny and new rocketship, and views the Aphrodite as less-than. \\n\\tWithin the first week of flying her, the Aphrodite had a burned steering tube, which made it necessary to go into free-fall as the damage control party made repairs. Strike\\u2019s faith in Lover-Girl continued to plummet. \\n\\tHowever, after Lieutenant Hendricks, the resident engineer, got her hands on the Aphrodite, Strike\\u2019s opinion started to change. Her knowledge of the ship, engineering, and piloting helped him gain confidence in both her abilities and those of Aphrodite.\\nNear the end of the story, the Aphrodite is tasked with rescuing two ships that are falling into the sun. Previously Lieutenant Hendricks had fixed up the surge-circuit rheostat, and so she offered it up as the only solution. Strike agrees to try it, which shows his faith and trust in the Aphrodite. Luckily, all things go to plan, and the Aphrodite, with Strike piloting, is able to save the two ships and Admiral Gorman. \\nAfter Strike won a medal himself, finally following in the family footsteps, he is offered his old position back on the Ganymede. He refuses, and instead returns to old Lover-Girl. He has grown fond of her over the course of their adventure, and they develop a partnership. \"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"Strike is completely unimpressed by the rocket ship Aphrodite. He comments that she looks like a pregnant carp, and he knows that he\\u2019s been assigned captain of the ship because he messed up terribly on his other missions. \\n\\nAphrodite was built 10 years ago, and now she is completely outdated and a laughing stock compared to the other spaceships in the fleet.  She was designed by Harlan Hendricks, and the engineer received a Legion of Merit award for her design. \\n\\nStrike\\u2019s mission is to fly Aphrodite to take the mail from Venusport to Canalopolis, Mars. It\\u2019s boring and straightforward.\\n\\nWhen a disaster occurs and two other ships, the Atropos and the Lachesis, are in serious danger of getting too close to the sun, Strike agrees to take the old girl on a rescue mission. He is convinced by Ivy, since she knows the ship better than anyone else and she believes in her. \\n\\nAlthough Ivy takes Aphrodite most of the way there, its Strike who finishes the mission and saves his former boss, Gorman, and many other people from certain death. Aphrodite is the entire reason that Strike is able to mend his terrible reputation and he wins back respect from Gorman. Although they got off to a rocky start, Strike finds it impossible to leave his best girl, even when he is offered a job on another ship. He is loyal to the ship that made him a hero. \\n\"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"Strike is assigned to be commander of the spaceship Aphrodite. The ship is assigned as a mail carrier for the inner part of the solar system. The Aphrodite is a dilapidated design with an awful reputation. Strike ended up with the Aphrodite as a result of a series of poor professional decisions that resulted in him getting command of the more prestigious ship Ganymede taken away from him.\\n\\nHis initial impression of the Aphrodite softens to a grudging respect after the successful mission to save the Atropos and Lachesis. Although he presumably is in line to command the Ganymede again, another faux pas resulting in Strike continuing to command the Aphrodite.   \"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"At the beginning of the story, Strike is very reluctant to accept Aphrodite, because being in charge of the ship means a demotion for him. His perception of the ship at the beginning of the story is colored by this history, and his first impression of the ship is not a positive one, even from the outside. Besides the actual construction of the ship, the technology that ran it was not something he showed much faith in. The first week that he was in charge after leaving Venus, it seemed things were going drastically wrong. When one important piece of equipment burnt out, the ship went into freefall, requiring a lot of repair work from the engineers, and anyone in charge of navigation was handed more work because of this as well. The ship was really put to the test when the Aphrodite responded to the distress call from the Lachesis, whose crew was trying to keep the Atropos from falling into the sun. Because Ivy knew the Aphrodite so well, and had been working on the circuits, it turned out the Aphrodite was the perfect ship to save the day. She could not see the rescue all the way through to the end, because she passed out early, but Strike was conscious a little bit longer and took over until he also passed out. After this unexpected rescue mission, Cob, the Executive Officer, noted that Strike has a newfound appreciation for the ship, and has no intention of leaving. Strike is dedicated to his new mission, even though at the beginning of the story he wanted nothing more than to pilot something the same rank as his old ship.\"}]}, {\"question_text\": \"Describe the setting of the story.\", \"question_number\": 4, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Jinx Ship to the Rescue by Alfred Coppel, Jr. takes place in space, but more specifically in the Aphrodite. \\n\\tIt starts in the muddy Venusport Base on Venus. Venusport is famous for its warm, slimy, and green rain that falls for 480 hours of every day. A fog rolls in and degrades visibility. \\n\\tDespite starting on Venusport Base, the characters actually spend most of their time onboard the Aphrodite, a Tellurian Rocket Ship. The Aphrodite had a surge-circuit monitor of twenty guns built into her frame. She was bulky, fat, and ugly, and occasionally had some technical and mechanical struggles as well. \\n\\tAlthough her frame may not be appealing, she soon becomes victorious as she gains the trust of Strike and other members of his crew and saves two fallen dreadnaughts. With her surge-circuit rheostat rebuilt, the Aphrodite is finally able to accomplish what she was always meant to. \"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"The story starts on the planet of Venus. Venus has days that are 720 hours long, and rain is common. The rain is hot, slimy, and green, and it makes the already wet swamplands even more mushy. Fog is common on Venus.\\n\\nThe middle of the story takes place on the old and outdated ship, Aphrodite. She gives the crew members a lot of trouble on their first mission. She is in dire need of repairs, she\\u2019s slow, and it\\u2019s impossible to control her temperature. The crew members are unable to wear their uniforms because the temperature is over 100 degrees. \\n\\nAphrodite\\u2019s mission is simple. She needs to take the mail from Venus to Mars, and it\\u2019s the only thing she can be trusted to do successfully. So it\\u2019s very impressive when she ends up being the hero of the day and manages to rescue two other ships that are headed towards the sun. \\n\"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"The narrative is set in the early 21st century primarily aboard the spaceship Aphrodite. The ship's mission is to deliver mail in the inner part of the solar system.\\n\\nThe ships route takes them around the sun and as a result the ambient temperature inside the ship begins to rise to intolerable levels due to proximity to the sun. Because of the heat, the coed crew is allowed to operate with very little clothing. Aphrodite is a ship of an outdated design that gives it a lack of comfort and subjects it to numerous small problems that make its operation frustrating.\"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"The story starts at a spaceport on Venus, where it has been raining for hundreds of hours straight. The rain has stopped by the time the story starts, but it is left a lot of mud in the swampy marshes. It was nearing the end of the day, and the fog was enveloping the surroundings as it grew darker outside. It was hot and sticky at Venusport Base, but after Strike left the service on his mission in the Aphrodite, it would only grow hotter on board. The ship itself, where most of the story takes place, is an older, refitted, bulky type of ship. There were only two others like it, and their designer had been awarded a Legion of Merit for the three. However, this is the only one still in use, as the others were destroyed in a much earlier mission. Strike\\u2019s disappointment in the ship seems to mirror the sentiment. Inside the ship, there are many systems of pipes connected the control panels, and the captain had to navigate carefully so that he didn\\u2019t hit his head on the bulkhead. While in space, as the ship flew closer and closer to the sun, the interior of the ship grew hotter and hotter. The crew opted to wear as little clothing as possible in an attempt to handle the heat. When the Aphrodite received the distress call from the Lachesis, the ships were close enough to the sun to be affected by its gravitational pull. After the close call near the sun, once everyone regained consciousness, the story ends at an officer\\u2019s club on Mars. It was a formal environment, and the Aphrodite\\u2019s captain and executive officer planned the rest of their route from there.\"}]}, {\"question_text\": \"Who is Strike and what happens to him throughout the story?\", \"question_number\": 5, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Strike is a member of an esteemed service family on Venus; seven generations of well-behaved and well-trained operators. Unfortunately, Strike struggles to carry on the family tradition, and is known for misspeaking and offending those around him. By trusting his gut, he wound up failing his higher-ups and crew several times. All this culminated in an eventual mistrust of Strike, which led to him being charged with the Aphrodite. \\n\\tHis deep hatred of Space Admiral Gordon is passionate, but not without reason. Gordon is the one who demoted him to the Aphrodite. At the start, Strike is checking out his new vessel and notes how ugly the ship is. After examining the ship and it\\u2019s crew, it is revealed that Strike is uncomfortable around women and believes they don\\u2019t belong on a spaceship. \\n\\tIn order to start flying, he calls in an expert engineer to come aboard and travel with them. Thinking I.V. Hendricks is a man, he is excited to have them onboard. But when Ivy Hendricks shows up, a female engineer and the daughter of the Aphrodite\\u2019s creator, his world is soon turned upside down. \\n\\tHis initial negative reaction to her is soon displaced by begrudging appreciation and eventually trust and friendship. Hendricks proves his previous theories about women wrong, and Strike is forced to accept that perhaps women do belong on a spaceship. She especially impresses him with her total knowledge of spaceship engineering and the Aphrodite in general. And it helped that she hated Admiral Gorman just as much as Strike, if not more. \\n\\tWhile flying by the sun to deliver mail, the Aphrodite receives a distress call from two ships: the Lachesis and the Atropos, the latter of which carried Admiral Gorman onboard. After the Aphrodite reached orbit, the Lachesis reached out and reported the Atropos was falling into the sun, due to a burst chamber. They couldn\\u2019t move those onboard over thanks to all the radiation, so the Lachesis was attempting to pull the Atropos back using an unbreakable cord. But it wasn\\u2019t enough. \\n\\tSince Ivy Hendricks had fixed the surge-circuit rheostat--the feature that crashed the original Aphrodite--, they were able to save the Lachesis and the Atropos and regain some of their dignity and former glory. \\n\\tStrike is awarded the Spatial Cross, as well as Cob, his friend and longtime executive of the Aphrodite. Strike was asked to return to the Ganymede, a beautiful sleek ship, but allegedly said the wrong thing to Gorman, and was instead sent back to the Aphrodite. Cob believes he did it on purpose, as Strike had grown quite fond of Lover-Girl. \\n\\tIvy has gone to the Bureau of Ships to engineer vessels, a great upgrade from her previous job. Cob pressures Strike to reach out to her, but he refuses. However, it ends on a hopeful note, with the potential for romance between Strike and Hendricks, and even more adventures on the clunky Aphrodite. \"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"Strike\\u2019s real name is Brevet Lieutenant Commander David Farragut Strykalski III. After serving on the Ganymede, he is put in charge of the Aphrodite. He comes from many generations of officers. However, he doesn\\u2019t feel like he fits the mold of his grandfather and great-grandfather and so on. His boss, Gorman, disagreed with several decisions he made in the past and sent him to work on the Aphrodite, the unimpressive spaceship.\\n\\nStrike does not like working with women in space, so he is disappointed when two of his crew members are powerful and successful females. He learns his lesson after working with Ivy Hendricks for a few weeks. She impresses him with her piloting skills and her knowledge of the ship that her father designed. \\n\\nStrike is skeptical at first when Ivy wants to take Aphrodite to rescue two ships whose crew members are in grave danger. He knows that the mistakes he made before got him on the Aphrodite, and there\\u2019s a big chance that he\\u2019ll be fired for trying to save the day, or worse, the mission could end in death for him and all of his crew members. He has feelings for Ivy, and her intense passion convinces him that she\\u2019s right, Aphrodite can handle the mission and they can save those peoples\\u2019 lives.\\n\\nIvy pilots the ship almost the entire route, but she is unable to finish the job when she passes out from the intense heat. Captain Strike takes over and saves the crews on the Atropos and the Lachesis. He is hailed as a hero, and he repairs his terrible reputation with the selfless act. He decides not to leave the Aphrodite. He wants to be loyal to the ship that worked so hard for him. He does decide to give Ivy a call. Even though she outranks him, he has to admit that he has a crush on her. \"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"Strike is the commander of the Aphrodite. He was originally the commander of the prestigious Ganymede. However a number of decisions made out of bravado as well as some unprofessional comments lost him that command.\\n\\nNow in command of a dilapidated ship, Strike comes to terms with his job. He commands a crew including a large number of women which makes him somewhat uncomfortable. His engineering officer Ivy Hendricks in particular seems to be of romantic interest to Strike.\\n\\nStrike ends up teaming with Ivy to save two ships from falling into the sun earning him a small promotion but an ill-advised comment prevents him from leaving the Aphrodite, perhaps to the satisfaction of Strike himself.\"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"Strike is a highly decorated lieutenant commander in the Navy, who comes from a long line of ship operators. Although he has run many successful missions, he has a reputation of causing trouble\\u2014his new Executive Officer, Cob, has heard a number of stories that he asks Strike for details about. Strike has lost command of the ship that he had been captaining, and is sent by Admiral Gorman to captain a mail route on the Aphrodite. He is extremely hesitant to have any positive feelings about the experience, from the ship itself, to the inclusion of women on its crew. Not only is this not the type of ship he is used to, he is never served with women on board. He has to navigate adapting to the new situation while adapting to the new job. Through the first week of his assignment, the ship and its crew grow on him. He comes to trust Ivy Hendricks, the Engineering Officer, and he lets her take charge to try to save the other ships when they respond to a distress call. Eventually, she passes out, and has to leave Strike in charge of getting the ships to safety. Eventually,  Strike passes out just like everyone else, from the ship\\u2019s acceleration to break the sun\\u2019s gravity. At the end of the story, it is clear that his increased appreciation for the ship means he plans on staying, to the delight of his Executive Officer. Cob alludes to Strike having feelings for Ivy, but he says that although she is nice, he has no interest in being with a woman with a higher ranked title than he has. \"}]}]}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003etrain, dev, test\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eStories that appear in both SQuALITY and \u003ca href=\"https://github.com/nyu-mll/quality\"\u003eQuALITY\u003c/a\u003e are\n              assigned to the same split in both datasets.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe summaries in the dataset were crowdsourced, allowing us to use input documents that are easily\n              understood by crowdworkers (as opposed to technical domains, such as scientific papers). Additionally,\n              there is no lede bias in stories, as is typically in news articles used in benchmark summarization\n              datasets like CNN/DM and XSum.\u003c/p\u003e\n            \u003cp\u003eAdditionally, the dataset is multi-reference and the references for each task are highly diverse. Having\n              a diverse set of references better represents the set of acceptable summaries for an input, and opens the\n              door for creative evaluation methodologies using these multiple references.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe inputs (story-question pairs) are multi-reference. The questions are high-level and are written to\n              draw from multiple parts of the story, instead of a single section of the story.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/2205.11465\"\u003eoriginal paper\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/2112.07637\"\u003emodeling question-focused summarization\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/2104.05938\"\u003esimilar task format but different domain\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFollowing norms in summarization, we have evaluated with automatic evaluation metrics like ROUGE and\n              BERTScore, but these metrics do not correlate with human judgments of summary quality when comparing model\n              summaries (see paper for details).\u003c/p\u003e\n            \u003cp\u003eWe highly recommend users of the benchmark use human evaluation as the primary method for evaluating\n              systems. We present one example of such in the paper in which we ask Upwork workers to read the short\n              story and then rate sets of three responses to each question. While this is close to the gold standard in\n              how we would want to evaluate systems on this task, we recognize that finding workers who will read the\n              whole story (~30m) is difficult and expensive, and doing efficient human evaluation for long document\n              tasks is an open problem.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHuman evaluation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSee paper (\u003ca href=\"https://arxiv.org/abs/2205.11465\"\u003ehttps://arxiv.org/abs/2205.11465\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUpwork: US-born, native English speakers with backgrounds in the humanities and copywriting\u003c/p\u003e\n            \u003cp\u003eNYU undergraduates: English-fluent undergraduates from a diverse set of nationalities and majors\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe short stories are primarily science fiction and from the 1930s -- 1970s.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e11\u0026#x3C;n\u0026#x3C;50\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEnglish-fluent, with experience reading and writing about literature\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by another rater\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach response was reviewed by three reviewers, who ranked the response (against two other responses),\n              highlighted errors in the response, and provided feedback to the original response writer.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWriters were informed that their writing and reviewing would be used in the development of AI.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe stories in the dataset are from the 1930--1970s and may contain harmful stances on topics like race\n              and gender. Models trained on the stories may reproduce these stances in their outputs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe proposed automatic metrics for this dataset (ROUGE, BERTScore) are not sensitive to factual errors in\n              summaries, and have been shown to not correlate well with human judgments of summary quality along a\n              number of axes.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"squality","type":"Summarization","languages":"English","summary":"SQuALITY (Summarization-format QUestion Answering with Long Input Texts, Yes!) is a summarization dataset that is (1) Abstractive, (2) Long-input - The input document are short stories between 3000--6000 words. (3) Question-focused - Each story is associated with multiple question-summary pairs. (4) Multi-reference - Each question is paired with 4 summaries. (5) High-quality - The summaries are crowdsourced from skilled and trained writers."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"squality"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"squality","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003esquality\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eSQuALITY (Summarization-format QUestion Answering with Long Input Texts, Yes!) is a summarization dataset\n          that is:\u003c/p\u003e\n        \u003cul\u003e\n          \u003cli\u003eAbstractive\u003c/li\u003e\n          \u003cli\u003eLong-input: The input document are short stories between 3000--6000 words.\u003c/li\u003e\n          \u003cli\u003eQuestion-focused: Each story is associated with multiple question-summary pairs.\u003c/li\u003e\n          \u003cli\u003eMulti-reference: Each question is paired with 4 summaries.\u003c/li\u003e\n          \u003cli\u003eHigh-quality: The summaries are crowdsourced from skilled and trained writers.\u003c/li\u003e\n        \u003c/ul\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/squality')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/squality\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/nyu-mll/SQuALITY\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2205.11465\"\u003eArXiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAlex Wang (NYU); Angelica Chen (NYU); Richard Yuanzhe Pang (NYU); Nitish Joshi (NYU); Samuel R. Bowman (NYU)\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAlex Wang\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGiven a question about a particular high-level aspect of a short story, provide a summary about that aspect\n          in the story (e.g., plot, character relationships, setting, theme, etc.).\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/nyu-mll/SQuALITY\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/nyu-mll/SQuALITY/data\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2205.11465\"\u003eArXiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{wang2022squality,\ntitle={S{Q}u{ALITY}: Building a Long-Document Summarization Dataset the Hard Way},\nauthor={Wang, Alex and Pang, Richard Yuanzhe and Chen, Angelica and Phang, Jason and Bowman, Samuel R.},\njournal={arXiv preprint 2205.11465},\nyear={2022}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAlex Wang\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:wangalexc@gmail.com\"\u003ewangalexc@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003estories: 1930--1970 American English\n              summaries: modern American English\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003estories: 1930--1970 American science fiction writers (predominantly American men)\n              summaries: Upwork writers (college-educated, native-English) and NYU undergraduates (English-fluent\n              college students)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-4.0: Creative Commons Attribution 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esummarization research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGiven a question about a particular high-level aspect of a short story, provide a summary about that\n              aspect in the story (e.g., plot, character relationships, setting, theme, etc.).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNew York University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAlex Wang (NYU); Angelica Chen (NYU); Richard Yuanzhe Pang (NYU); Nitish Joshi (NYU); Samuel R. Bowman\n              (NYU)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEric and Wendy Schmidt; Apple; NSF\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAlex Wang (NYU)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003emetadata: Project Gutenberg ID, internal UID, Project Gutenberg license\u003c/li\u003e\n              \u003cli\u003edocument: the story\u003c/li\u003e\n              \u003cli\u003equestions: a list where each element contains\n                \u003cul\u003e\n                  \u003cli\u003equestion text: the question\u003c/li\u003e\n                  \u003cli\u003equestion number: the order in which workers answered the question\u003c/li\u003e\n                  \u003cli\u003eresponses: a list where each element contains\n                    \u003cul\u003e\n                      \u003cli\u003eworker ID: anonymous\u003c/li\u003e\n                      \u003cli\u003einternal UID\u003c/li\u003e\n                      \u003cli\u003eresponse text: the response\u003c/li\u003e\n                    \u003c/ul\u003e\n                  \u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is arranged with responses grouped by question (for ease of multi-reference training and\n              evaluation) and questions grouped by story (to avoid duplicating the story in the dataset)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\"metadata\": {\"passage_id\": \"63833\", \"uid\": \"ea0017c487a245668698cf527019b2b6\", \"license\": \"\"}, \"document\": \"Story omitted for readability\", \"questions\": [{\"question_text\": \"What is the plot of the story?\", \"question_number\": 1, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Brevet Lieutenant Commander David Farragut Stryakalski III, AKA Strike, is charged with commanding a run-down and faulty vessel, the Aphrodite. Aphrodite was the brain-child of Harlan Hendricks, an engineer who ushered in new technology ten years back. All three of his creations failed spectacularly, resulting in death and a failed career. The Aphrodite was the only ship to survive, and she is now used for hauling mail back and forth between Venus and Mars.\\nStrike and Cob, the Aphrodite\\u2019s only executive to last more than six months, recount Strike\\u2019s great failures and how he ended up here. He used to fly the Ganymede, but was removed after he left his position to rescue colonists who didn\\u2019t need rescuing. Strike was no longer trustworthy in Admiral Gorman\\u2019s eyes, so he banished him to the Aphrodite. \\nThe circuit that caused the initial demise of Aphrodite was sealed off. After meeting some members of his crew, Strike orders a conference for all personnel and calls in an Engineering Officer, one I.V. Hendricks. \\nAfter Lieutenant Ivy Hendricks arrives--not I.V.--Strike immediately insults her by degrading the ship\\u2019s designer, Harlan Hendricks. As it turns out, Hendricks is his daughter, and she vows to prove him wrong and all those who doubted her father. \\nDespite their initial conflict, Strike and Hendricks\\u2019 relationship soon evolves from resentment to respect. During this time, Strike\\u2019s confidence in the Aphrodite plummets as she suffers from mechanical issues. \\nThe Aphrodite starts to heat up as they get closer to the sun. The refrigeration units could not handle the heat, causing discomfort among the crew. As they get closer, a radar contact reveals that two dreadnaughts, the Lachesis and the Atropos, are doing routine patrolling. Nothing to worry about, except the Atropos had Admiral Gorman on board, hated by Strike and Hendricks.\\nStrike and Hendricks make a joke about Gorman falling into the sun. As the temperature steadily climbs, the crew members overheat and begin fighting, resulting in a black eye. A distress signal came through from the Lachesis: the Atropos, with Gorman on board, was tumbling into the sun. The Lachesis was attempting to rescue them with an unbreakable cord, but they too were being pulled in. \\nHendricks had fixed the surge-circuit rheostat, the one her father designed, and claimed it could help them rescue the ships. After some tension, Strike agrees and they race down to the sun to pick up the drifting dreadnaughts. \\nStrike puts Hendricks in charge, but soon the heat overtakes her, and she is unable to continue. Strike takes over, attaches the Aphrodite to the Lachesis with a cord, and turns on the surge-circuit. They blast themselves out of there, rescuing the two ships and Admiral Gorman at the same time. \\nCob and Strike are awarded Spatial Cross awards, while Hendricks is promoted to an engineering position at the Bureau of Ships. The story ends with Cob and Strike flipping through the pages of an address book until they land on Canalopolis, Mars. \\n\"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"Strike joins the crew of the Aphrodite after he has made several poor decisions while he was the captain of another spaceship. He is essentially being punished by his boss, Gorman, and put somewhere where he can do little harm. His job is to deliver the mail from Venus to Mars, so it\\u2019s pretty straightforward. \\n\\nWhen he meets the Officer of the Deck, Celia Graham, he immediately becomes uncomfortable. He does not like to work with women in space, although it\\u2019s a pretty common occurrence. He holds a captain\\u2019s meeting the first day on the job, and he waits to meet his Engineering Officer, I.V. Hendricks. He makes a rude comment about how the man is late for his first meeting, but actually, the female Ivy has already shown up. \\n\\nAfter meeting Ivy formally, he makes a comment about how the ship Aphrodite was built by an imbecile. Ivy immediately tells him that he\\u2019s wrong, and she knows this because the designer of the ship was none other than her own father. \\n\\nHis first week as captain on the new ship goes very poorly. Several repairs need to be done to Aphrodite, they run  behind schedule, and the new crew members have a tough time getting a handle on Aphrodite\\u2019s intricacies. \\n\\nThe heat index in the ship begins to rise, and the crew members can no longer wear their uniforms without fainting. Suddenly a distress call comes in, and it\\u2019s coming from the Atropos, a ship Captained by Gorman, and the Lachesis. The crew members hesitate to take the oldest and most outdated machinery on a rescue trip. Strike has been in trouble for refusing to follow commands before, and he knows it\\u2019s a risky move. However, Ivy insists that she knows how to pilot the Aphrodite, and she can save the crew members on the Atropos and the Lachesis from death. They are quickly tumbling towards the sun, and they will perish if someone doesn\\u2019t do something quickly. \\n\\nIvy takes control of the ship, and the heat on the Aphrodite continues to rise steadily. Eventually, she faints from pure heat exhaustion, and she tells Strike that he must take over. He does, and he manages to essentially lasso the other two ships, and with just the right amount of power, he pulls them back into orbit. \\n\\nAt a bar, after the whole ordeal, Cob pokes fun at Strike for staying on the Aphrodite. He then admits that he actually respects Strike\\u2019s loyalty to the ship that saved his reputation. Cob asks about Strike\\u2019s relationship with Ivy, but Strike tells him that she has taken her dad\\u2019s former job, so she no longer works with him. Strike takes the moment to look up her info, presumably to restart the relationship. \\n\"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"The narrative follows commander Strike as he begins his command of the spaceship Aphrodite. Strike comes from a long line of military greats but himself is prone to poor professional decision making.\\n\\nAs he takes command, the mission is a simple mail run. However, in the course of their journey, they receive word of two ships in dire need of rescue. Strike and his engineering officer, Ivy Hendricks, decide to use the ships extremely risky surge-circuit to aid the ships.\\n\\nThe rescue is a success and the crew is hailed for its bravery in saving the doomed vessels. \"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"The story starts in a muddy swamp on Venus, where Strike, a Brevet Lieutenant Commander, is encountering his new ship, the Aphrodite, for the first time. Here on Venusport Base, he is introduced to the executive officer of the ship, a man who goes by Cob. Strike comes from a line of servicemen who were all well respected, but he himself has more of a reputation for causing trouble by saying the wrong things or deviating from mission plans. His reputation preceded him, as Cob had specific questions about some of these events. The Aphrodite was incredibly impressive when it was designed, but did not live up to its expectations. It had been refitted, and the new mission that Strike was to lead was a mail run between Venus and Mars. As he entered the ship, Strike began to meet his new crew, including Celia Graham, his Radar Officer. Strike is not used to women being on ships and is decidedly uncomfortable with the idea. As he is briefing the officers who were already present, Strike is surprised when he meets his new engineering officer, Ivy Hendricks. Ivy is the daughter of the man who designed the ship, and she is cold to Strike at first, as he is to her. However, her expertise in engineering generally, the ship specifically, and other skills as well as piloting, meant that Strike warmed up to her as their mission went on. As the ship was flying towards Mars on their route, the crew picked up a distress signal from the Lachesis, which was trying to pull the Atropos away from the gravitational pull of the sun after it was damaged in an equipment malfunction. The Admiral who had put Strike in charge of the Aphrodite was on the Atropos, and Ivy dislikes him even more than Strike does, but they know they have to try to save the crews. Strike is hesitant, but Ivy has a plan and insists that they try. She has spent all of her free time tinkering with the circuits, and takes charge. She turned the Aphrodite towards the ships in danger, and sends out a cable to connect the Aphrodite to those ships. After they are all connected, the ships continue to spin towards the sun, which causes Ivy to pass out, leaving Strike in charge. He manages to pull the ships into line and send the Aphrodite in the right direction before passing out himself. The Aphrodite has the power to pull everyone away from the Sun\\u2019s gravity, but the acceleration knocks everyone out on all three ships. In the end, it was a successful rescue mission of multiple crews. Strike and Cob find themselves in an officer\\u2019s club at the end of the story, discussing Ivy\\u2019s new job, and Strike acknowledges that Cob is right about the Aphrodite having grown on him, and plans to stay its captain.\"}]}, {\"question_text\": \"Who is Ivy Hendricks and what happens to her throughout the story?\", \"question_number\": 2, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Lieutenant Ivy Hendricks is the daughter of Harlan Hendricks, a formerly respected engineer. He created the surge-circuit, an innovation in interstellar astrogation, and he was awarded a Legion of Merit. He designed three famous ships: the Artemis, the Andromeda, and the Aphrodite, the prototype. Despite being hailed as the latest and greatest in technology, all three ships either exploded or failed. \\nAccording to Lieutenant Ivy Hendricks, their failures were due to the lack of education on board. She claimed that her father asked for the crew members to be trained in surge-circuit technology, so they could use it properly and correctly. That wish was not granted and after all three ships failed, his reputation and career were doomed. Admiral Gorman pulled the plug on his career and therefore became the target of all Lieutenant Hendricks\\u2019 hate. \\nWith a bone to pick, Lieutenant Hendricks, a knowledgeable engineer herself, comes aboard the Aphrodite to serve as her engineer and occasional pilot. She wants to prove to the world that her father\\u2019s creation was genius and deserving of praise. \\nAlthough they started off on the wrong foot, Lieutenant Hendricks and Strike, her commander, develop a friendship and appreciation for each other. They bond over their deep hatred of Admiral Gorman and the joy of piloting a ship. She soon proves herself to Strike, and he begins to trust her. Their relationship walks the fine line between friendship and romance. \\nAs the Aphrodite is attempting to rescue the fallen dreadnaughts, Lieutenant Hendricks comes up with the solution. Due to her constant tinkering on the ship, she had fixed the surge-circuit rheostat and made it ready to use. Initially, no one trusts her, seeing as the last time it was used people died. But Strike\\u2019s trust in her is strong and true, so he approves the use of the surge-circuit. Hendricks pilots the ship, but soon becomes too overheated and comes close to fainting. Strike takes over piloting and eventually activates the surge-circuit. It works and they are able to rescue the two ships, one of which had Admiral Gorman, her sworn enemy, onboard. \\nLieutenant Hendricks receives a major promotion; she is now an engineer at the Bureau of Ships. She proved them wrong, and restored her father\\u2019s legacy and good name. The story ends with their romance left in the air, but Hendricks has much to be proud of. \\n\"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"\\nLieutenant Ivy Hendricks is the new Engineering Officer on Aphrodite. Strike and Cob assume that Ivy is a man before she arrives because they are sexist and because her name is listed as I.V. in the orders. Ivy is actually the daughter of the man who designed the award-winning craft.\\n\\nShe is cold and unfriendly towards Strike after she meets him, and that\\u2019s probably because he makes a rude comment about the ship which her father created. After a couple weeks of working together, the two begin to get along very well. Strike admires Ivy\\u2019s piloting skills and her depth of knowledge about the Aphrodite. \\n\\nThe two also bond over their shared hatred of Strike\\u2019s former boss, Gorman. Strike feels as though he has ruined his career, and Ivy thinks that Gorman torpedoed her father\\u2019s career. Ivy wants nothing more than to prove that Gorman is an idiot. \\n\\nHowever, when Gorman\\u2019s ship is hurtling towards the sun and he and his crew members are about to die, Ivy sees that it\\u2019s the perfect opportunity to show Gorman just how wrong he was about the ship her father designed. It\\u2019s a very dangerous mission, but Ivy is steadfast in her decision and she\\u2019s deeply courageous. She pilots the ship for most of the rescue mission, but eventually faints from the extreme heat. She tells Strike that he needs to take over, and he does a great job. \\n\\nIvy is then promoted, and she moves to Canalopolis, Mars. She now outranks her former Captain, Strike. \\n\"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"Ivy Hendricks is the engineering officer assigned to the Aphrodite. She is the daughter of Harlan Hendricks, the ship's original designer. She is fiercely protective of her father's legacy and resents Admiral Gorman for the way he treated him.\\n\\nHendricks and Strike, form an alliance of sorts after his initial surprise of seeing a woman assigned to this officer's role. When news arrives that two ships are in danger of falling into the sun, Ivy lobbies to use her father's technology to save the ship. Strike agrees to her plan although the risks are high. The Aphrodite eventually saves the ships although Ivy faints in the process from the heat and command has to be taken over by Strike.\\n\\nThe successful mission results in a promotion for Ivy as she works as a designer in the Bureau of Ships like her father.\"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"Ivy Hendricks is the new engineering officer on the Aphrodite, having been transferred from the Antigone. She is a tall woman with dark hair and contrasting pale blue eyes, who has a very wide range of experience in ship operations and engineering. Her father, Harlan Hendricks, was the man who designed the Aphrodite, so she knows the ship needs a lot of specific training. At first, the captain did not expect her to be a woman, and managed to imply that many people found her father incompetent. Although she seemed cold at first, as she reacted to the situation, she and the captain eventually got along fairly well, as he learned to appreciate her wide skill set that ranged from engineering to piloting. Ivy and Strike also had a common enemy in the higher ranks: Space Admiral Gorman. Once Spike trusted her he appreciated that Ivy spent a lot of spare time working on the old circuits, so she knew the ship like the back of her hand. When the Aphrodite found the Lachesis and the Atropos when following up on a distress signal, Ivy new the ship well enough to be able to formulate a plan to save everyone. She piloted the Aphrodite carefully, using cables shot with a rocket to connect the three ships together, but the spinning of the ships in the heat inside meant that she passed out and had to leave Strike to take over for her. Her plan was successful; she was promoted, and instead of returning to the Aphrodite she started a design job with the Bureau of Ships.\"}]}, {\"question_text\": \"What is the relationship between Strike and Aphrodite?\", \"question_number\": 3, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Strike is a member of a famous, well-behaved, and well-trained service family. His father and grandfather served in World War II and the Atomic War, respectively. Both earned medals for their heroic service. Strike, however, did not follow in his family\\u2019s footsteps. \\n\\tWith a tendency to say the wrong thing at the wrong time, Strike often offended those around him and garnered a negative reputation. After being put in charge of the Ganymede, he soon lost his position after abandoning his station to rescue colonists who were not in danger. As well, he accused a Martian Ambassador of being a spy at a respectable ball. Admiral Gorman soon demoted him, and he became the commander of the Aphrodite. \\n\\tAt first, Strike was not a fan. He sees her as ugly, fat, and cantankerous. He misses the Ganymede, a shiny and new rocketship, and views the Aphrodite as less-than. \\n\\tWithin the first week of flying her, the Aphrodite had a burned steering tube, which made it necessary to go into free-fall as the damage control party made repairs. Strike\\u2019s faith in Lover-Girl continued to plummet. \\n\\tHowever, after Lieutenant Hendricks, the resident engineer, got her hands on the Aphrodite, Strike\\u2019s opinion started to change. Her knowledge of the ship, engineering, and piloting helped him gain confidence in both her abilities and those of Aphrodite.\\nNear the end of the story, the Aphrodite is tasked with rescuing two ships that are falling into the sun. Previously Lieutenant Hendricks had fixed up the surge-circuit rheostat, and so she offered it up as the only solution. Strike agrees to try it, which shows his faith and trust in the Aphrodite. Luckily, all things go to plan, and the Aphrodite, with Strike piloting, is able to save the two ships and Admiral Gorman. \\nAfter Strike won a medal himself, finally following in the family footsteps, he is offered his old position back on the Ganymede. He refuses, and instead returns to old Lover-Girl. He has grown fond of her over the course of their adventure, and they develop a partnership. \"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"Strike is completely unimpressed by the rocket ship Aphrodite. He comments that she looks like a pregnant carp, and he knows that he\\u2019s been assigned captain of the ship because he messed up terribly on his other missions. \\n\\nAphrodite was built 10 years ago, and now she is completely outdated and a laughing stock compared to the other spaceships in the fleet.  She was designed by Harlan Hendricks, and the engineer received a Legion of Merit award for her design. \\n\\nStrike\\u2019s mission is to fly Aphrodite to take the mail from Venusport to Canalopolis, Mars. It\\u2019s boring and straightforward.\\n\\nWhen a disaster occurs and two other ships, the Atropos and the Lachesis, are in serious danger of getting too close to the sun, Strike agrees to take the old girl on a rescue mission. He is convinced by Ivy, since she knows the ship better than anyone else and she believes in her. \\n\\nAlthough Ivy takes Aphrodite most of the way there, its Strike who finishes the mission and saves his former boss, Gorman, and many other people from certain death. Aphrodite is the entire reason that Strike is able to mend his terrible reputation and he wins back respect from Gorman. Although they got off to a rocky start, Strike finds it impossible to leave his best girl, even when he is offered a job on another ship. He is loyal to the ship that made him a hero. \\n\"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"Strike is assigned to be commander of the spaceship Aphrodite. The ship is assigned as a mail carrier for the inner part of the solar system. The Aphrodite is a dilapidated design with an awful reputation. Strike ended up with the Aphrodite as a result of a series of poor professional decisions that resulted in him getting command of the more prestigious ship Ganymede taken away from him.\\n\\nHis initial impression of the Aphrodite softens to a grudging respect after the successful mission to save the Atropos and Lachesis. Although he presumably is in line to command the Ganymede again, another faux pas resulting in Strike continuing to command the Aphrodite.   \"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"At the beginning of the story, Strike is very reluctant to accept Aphrodite, because being in charge of the ship means a demotion for him. His perception of the ship at the beginning of the story is colored by this history, and his first impression of the ship is not a positive one, even from the outside. Besides the actual construction of the ship, the technology that ran it was not something he showed much faith in. The first week that he was in charge after leaving Venus, it seemed things were going drastically wrong. When one important piece of equipment burnt out, the ship went into freefall, requiring a lot of repair work from the engineers, and anyone in charge of navigation was handed more work because of this as well. The ship was really put to the test when the Aphrodite responded to the distress call from the Lachesis, whose crew was trying to keep the Atropos from falling into the sun. Because Ivy knew the Aphrodite so well, and had been working on the circuits, it turned out the Aphrodite was the perfect ship to save the day. She could not see the rescue all the way through to the end, because she passed out early, but Strike was conscious a little bit longer and took over until he also passed out. After this unexpected rescue mission, Cob, the Executive Officer, noted that Strike has a newfound appreciation for the ship, and has no intention of leaving. Strike is dedicated to his new mission, even though at the beginning of the story he wanted nothing more than to pilot something the same rank as his old ship.\"}]}, {\"question_text\": \"Describe the setting of the story.\", \"question_number\": 4, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Jinx Ship to the Rescue by Alfred Coppel, Jr. takes place in space, but more specifically in the Aphrodite. \\n\\tIt starts in the muddy Venusport Base on Venus. Venusport is famous for its warm, slimy, and green rain that falls for 480 hours of every day. A fog rolls in and degrades visibility. \\n\\tDespite starting on Venusport Base, the characters actually spend most of their time onboard the Aphrodite, a Tellurian Rocket Ship. The Aphrodite had a surge-circuit monitor of twenty guns built into her frame. She was bulky, fat, and ugly, and occasionally had some technical and mechanical struggles as well. \\n\\tAlthough her frame may not be appealing, she soon becomes victorious as she gains the trust of Strike and other members of his crew and saves two fallen dreadnaughts. With her surge-circuit rheostat rebuilt, the Aphrodite is finally able to accomplish what she was always meant to. \"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"The story starts on the planet of Venus. Venus has days that are 720 hours long, and rain is common. The rain is hot, slimy, and green, and it makes the already wet swamplands even more mushy. Fog is common on Venus.\\n\\nThe middle of the story takes place on the old and outdated ship, Aphrodite. She gives the crew members a lot of trouble on their first mission. She is in dire need of repairs, she\\u2019s slow, and it\\u2019s impossible to control her temperature. The crew members are unable to wear their uniforms because the temperature is over 100 degrees. \\n\\nAphrodite\\u2019s mission is simple. She needs to take the mail from Venus to Mars, and it\\u2019s the only thing she can be trusted to do successfully. So it\\u2019s very impressive when she ends up being the hero of the day and manages to rescue two other ships that are headed towards the sun. \\n\"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"The narrative is set in the early 21st century primarily aboard the spaceship Aphrodite. The ship's mission is to deliver mail in the inner part of the solar system.\\n\\nThe ships route takes them around the sun and as a result the ambient temperature inside the ship begins to rise to intolerable levels due to proximity to the sun. Because of the heat, the coed crew is allowed to operate with very little clothing. Aphrodite is a ship of an outdated design that gives it a lack of comfort and subjects it to numerous small problems that make its operation frustrating.\"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"The story starts at a spaceport on Venus, where it has been raining for hundreds of hours straight. The rain has stopped by the time the story starts, but it is left a lot of mud in the swampy marshes. It was nearing the end of the day, and the fog was enveloping the surroundings as it grew darker outside. It was hot and sticky at Venusport Base, but after Strike left the service on his mission in the Aphrodite, it would only grow hotter on board. The ship itself, where most of the story takes place, is an older, refitted, bulky type of ship. There were only two others like it, and their designer had been awarded a Legion of Merit for the three. However, this is the only one still in use, as the others were destroyed in a much earlier mission. Strike\\u2019s disappointment in the ship seems to mirror the sentiment. Inside the ship, there are many systems of pipes connected the control panels, and the captain had to navigate carefully so that he didn\\u2019t hit his head on the bulkhead. While in space, as the ship flew closer and closer to the sun, the interior of the ship grew hotter and hotter. The crew opted to wear as little clothing as possible in an attempt to handle the heat. When the Aphrodite received the distress call from the Lachesis, the ships were close enough to the sun to be affected by its gravitational pull. After the close call near the sun, once everyone regained consciousness, the story ends at an officer\\u2019s club on Mars. It was a formal environment, and the Aphrodite\\u2019s captain and executive officer planned the rest of their route from there.\"}]}, {\"question_text\": \"Who is Strike and what happens to him throughout the story?\", \"question_number\": 5, \"responses\": [{\"worker_id\": \"6\", \"uid\": \"0c27bef1b7b644ffba735fdb005f9529\", \"response_text\": \"Strike is a member of an esteemed service family on Venus; seven generations of well-behaved and well-trained operators. Unfortunately, Strike struggles to carry on the family tradition, and is known for misspeaking and offending those around him. By trusting his gut, he wound up failing his higher-ups and crew several times. All this culminated in an eventual mistrust of Strike, which led to him being charged with the Aphrodite. \\n\\tHis deep hatred of Space Admiral Gordon is passionate, but not without reason. Gordon is the one who demoted him to the Aphrodite. At the start, Strike is checking out his new vessel and notes how ugly the ship is. After examining the ship and it\\u2019s crew, it is revealed that Strike is uncomfortable around women and believes they don\\u2019t belong on a spaceship. \\n\\tIn order to start flying, he calls in an expert engineer to come aboard and travel with them. Thinking I.V. Hendricks is a man, he is excited to have them onboard. But when Ivy Hendricks shows up, a female engineer and the daughter of the Aphrodite\\u2019s creator, his world is soon turned upside down. \\n\\tHis initial negative reaction to her is soon displaced by begrudging appreciation and eventually trust and friendship. Hendricks proves his previous theories about women wrong, and Strike is forced to accept that perhaps women do belong on a spaceship. She especially impresses him with her total knowledge of spaceship engineering and the Aphrodite in general. And it helped that she hated Admiral Gorman just as much as Strike, if not more. \\n\\tWhile flying by the sun to deliver mail, the Aphrodite receives a distress call from two ships: the Lachesis and the Atropos, the latter of which carried Admiral Gorman onboard. After the Aphrodite reached orbit, the Lachesis reached out and reported the Atropos was falling into the sun, due to a burst chamber. They couldn\\u2019t move those onboard over thanks to all the radiation, so the Lachesis was attempting to pull the Atropos back using an unbreakable cord. But it wasn\\u2019t enough. \\n\\tSince Ivy Hendricks had fixed the surge-circuit rheostat--the feature that crashed the original Aphrodite--, they were able to save the Lachesis and the Atropos and regain some of their dignity and former glory. \\n\\tStrike is awarded the Spatial Cross, as well as Cob, his friend and longtime executive of the Aphrodite. Strike was asked to return to the Ganymede, a beautiful sleek ship, but allegedly said the wrong thing to Gorman, and was instead sent back to the Aphrodite. Cob believes he did it on purpose, as Strike had grown quite fond of Lover-Girl. \\n\\tIvy has gone to the Bureau of Ships to engineer vessels, a great upgrade from her previous job. Cob pressures Strike to reach out to her, but he refuses. However, it ends on a hopeful note, with the potential for romance between Strike and Hendricks, and even more adventures on the clunky Aphrodite. \"}, {\"worker_id\": \"1\", \"uid\": \"04e79312dede4a0da5993101e55a796a\", \"response_text\": \"Strike\\u2019s real name is Brevet Lieutenant Commander David Farragut Strykalski III. After serving on the Ganymede, he is put in charge of the Aphrodite. He comes from many generations of officers. However, he doesn\\u2019t feel like he fits the mold of his grandfather and great-grandfather and so on. His boss, Gorman, disagreed with several decisions he made in the past and sent him to work on the Aphrodite, the unimpressive spaceship.\\n\\nStrike does not like working with women in space, so he is disappointed when two of his crew members are powerful and successful females. He learns his lesson after working with Ivy Hendricks for a few weeks. She impresses him with her piloting skills and her knowledge of the ship that her father designed. \\n\\nStrike is skeptical at first when Ivy wants to take Aphrodite to rescue two ships whose crew members are in grave danger. He knows that the mistakes he made before got him on the Aphrodite, and there\\u2019s a big chance that he\\u2019ll be fired for trying to save the day, or worse, the mission could end in death for him and all of his crew members. He has feelings for Ivy, and her intense passion convinces him that she\\u2019s right, Aphrodite can handle the mission and they can save those peoples\\u2019 lives.\\n\\nIvy pilots the ship almost the entire route, but she is unable to finish the job when she passes out from the intense heat. Captain Strike takes over and saves the crews on the Atropos and the Lachesis. He is hailed as a hero, and he repairs his terrible reputation with the selfless act. He decides not to leave the Aphrodite. He wants to be loyal to the ship that worked so hard for him. He does decide to give Ivy a call. Even though she outranks him, he has to admit that he has a crush on her. \"}, {\"worker_id\": \"5\", \"uid\": \"71efb8636b504f42a6989bb90e360186\", \"response_text\": \"Strike is the commander of the Aphrodite. He was originally the commander of the prestigious Ganymede. However a number of decisions made out of bravado as well as some unprofessional comments lost him that command.\\n\\nNow in command of a dilapidated ship, Strike comes to terms with his job. He commands a crew including a large number of women which makes him somewhat uncomfortable. His engineering officer Ivy Hendricks in particular seems to be of romantic interest to Strike.\\n\\nStrike ends up teaming with Ivy to save two ships from falling into the sun earning him a small promotion but an ill-advised comment prevents him from leaving the Aphrodite, perhaps to the satisfaction of Strike himself.\"}, {\"worker_id\": \"3\", \"uid\": \"8aa46ba8bd2945c98babd7dd2d9ecc38\", \"response_text\": \"Strike is a highly decorated lieutenant commander in the Navy, who comes from a long line of ship operators. Although he has run many successful missions, he has a reputation of causing trouble\\u2014his new Executive Officer, Cob, has heard a number of stories that he asks Strike for details about. Strike has lost command of the ship that he had been captaining, and is sent by Admiral Gorman to captain a mail route on the Aphrodite. He is extremely hesitant to have any positive feelings about the experience, from the ship itself, to the inclusion of women on its crew. Not only is this not the type of ship he is used to, he is never served with women on board. He has to navigate adapting to the new situation while adapting to the new job. Through the first week of his assignment, the ship and its crew grow on him. He comes to trust Ivy Hendricks, the Engineering Officer, and he lets her take charge to try to save the other ships when they respond to a distress call. Eventually, she passes out, and has to leave Strike in charge of getting the ships to safety. Eventually,  Strike passes out just like everyone else, from the ship\\u2019s acceleration to break the sun\\u2019s gravity. At the end of the story, it is clear that his increased appreciation for the ship means he plans on staying, to the delight of his Executive Officer. Cob alludes to Strike having feelings for Ivy, but he says that although she is nice, he has no interest in being with a woman with a higher ranked title than he has. \"}]}]}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003etrain, dev, test\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eStories that appear in both SQuALITY and \u003ca href=\"https://github.com/nyu-mll/quality\"\u003eQuALITY\u003c/a\u003e are\n              assigned to the same split in both datasets.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe summaries in the dataset were crowdsourced, allowing us to use input documents that are easily\n              understood by crowdworkers (as opposed to technical domains, such as scientific papers). Additionally,\n              there is no lede bias in stories, as is typically in news articles used in benchmark summarization\n              datasets like CNN/DM and XSum.\u003c/p\u003e\n            \u003cp\u003eAdditionally, the dataset is multi-reference and the references for each task are highly diverse. Having\n              a diverse set of references better represents the set of acceptable summaries for an input, and opens the\n              door for creative evaluation methodologies using these multiple references.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe inputs (story-question pairs) are multi-reference. The questions are high-level and are written to\n              draw from multiple parts of the story, instead of a single section of the story.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/2205.11465\"\u003eoriginal paper\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/2112.07637\"\u003emodeling question-focused summarization\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/2104.05938\"\u003esimilar task format but different domain\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFollowing norms in summarization, we have evaluated with automatic evaluation metrics like ROUGE and\n              BERTScore, but these metrics do not correlate with human judgments of summary quality when comparing model\n              summaries (see paper for details).\u003c/p\u003e\n            \u003cp\u003eWe highly recommend users of the benchmark use human evaluation as the primary method for evaluating\n              systems. We present one example of such in the paper in which we ask Upwork workers to read the short\n              story and then rate sets of three responses to each question. While this is close to the gold standard in\n              how we would want to evaluate systems on this task, we recognize that finding workers who will read the\n              whole story (~30m) is difficult and expensive, and doing efficient human evaluation for long document\n              tasks is an open problem.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHuman evaluation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSee paper (\u003ca href=\"https://arxiv.org/abs/2205.11465\"\u003ehttps://arxiv.org/abs/2205.11465\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUpwork: US-born, native English speakers with backgrounds in the humanities and copywriting\u003c/p\u003e\n            \u003cp\u003eNYU undergraduates: English-fluent undergraduates from a diverse set of nationalities and majors\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe short stories are primarily science fiction and from the 1930s -- 1970s.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e11\u0026#x3C;n\u0026#x3C;50\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEnglish-fluent, with experience reading and writing about literature\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e4\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by another rater\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach response was reviewed by three reviewers, who ranked the response (against two other responses),\n              highlighted errors in the response, and provided feedback to the original response writer.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWriters were informed that their writing and reviewing would be used in the development of AI.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe stories in the dataset are from the 1930--1970s and may contain harmful stances on topics like race\n              and gender. Models trained on the stories may reproduce these stances in their outputs.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe proposed automatic metrics for this dataset (ROUGE, BERTScore) are not sensitive to factual errors in\n              summaries, and have been shown to not correlate well with human judgments of summary quality along a\n              number of axes.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"squality","type":"Summarization","languages":"English","summary":"SQuALITY (Summarization-format QUestion Answering with Long Input Texts, Yes!) is a summarization dataset that is (1) Abstractive, (2) Long-input - The input document are short stories between 3000--6000 words. (3) Question-focused - Each story is associated with multiple question-summary pairs. (4) Multi-reference - Each question is paired with 4 summaries. (5) High-quality - The summaries are crowdsourced from skilled and trained writers."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"squality"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/surface_realisation_st_2020.html b/data_cards/surface_realisation_st_2020.html
index e0938208..29540d60 100644
--- a/data_cards/surface_realisation_st_2020.html
+++ b/data_cards/surface_realisation_st_2020.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->surface_realisation_st_2020</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">surface_realisation_st_2020</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->surface_realisation_st_2020</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">surface_realisation_st_2020</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1929,4 +1929,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"surface_realisation_st_2020","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003esurface_realisation_st_2020\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis dataset was used as part of the multilingual surface realization shared task in which a model gets full\n          or partial universal dependency structures and has to reconstruct the natural language. This dataset support\n          11 languages.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/surface_realisation_st_2020')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca\n            href=\"https://huggingface.co/datasets/GEM/surface_realisation_st_2020\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://taln.upf.edu/pages/msr2020-ws/SRST.html#data\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.msr-1.1/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSimon Mille (Pompeu Fabra University); Leo Wanner (Pompeu Fabra University); Anya Belz (Brighton University);\n          Bernd Bohnet (Google Inc.); Thiago Castro Ferreira (Federal University of Minas Gerais); Yvette Graham\n          (ADAPT/Trinity College Dublin)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSimon Mille\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eChinese\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e,\n          \u003ccode\u003eIndonesian\u003c/code\u003e, \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003ePortuguese\u003c/code\u003e,\n          \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-2.5: Creative Commons Attribution 2.5 Generic\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe models are able to introduce surface features (syntax, morphology, topology) from more or less abstract\n          inputs in different, the most abstract being predicate-argument structures. The datasets cover a large variety\n          of domains (news, blogs, forums, wikipedia pages, etc.).\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://taln.upf.edu/pages/msr2020-ws/SRST.html#data\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://sites.google.com/site/genchalrepository/surface-realisation/sr-20-multilingual\"\u003eWebsite\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.msr-1.1/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{mille-etal-2020-third,\ntitle = \"The Third Multilingual Surface Realisation Shared Task ({SR}{'}20): Overview and Evaluation Results\",\nauthor = \"Mille, Simon  and\nBelz, Anya  and\nBohnet, Bernd  and\nCastro Ferreira, Thiago  and\nGraham, Yvette  and\nWanner, Leo\",\nbooktitle = \"Proceedings of the Third Workshop on Multilingual Surface Realisation\",\nmonth = dec,\nyear = \"2020\",\naddress = \"Barcelona, Spain (Online)\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2020.msr-1.1\",\npages = \"1--20\",\nabstract = \"This paper presents results from the Third Shared Task on Multilingual Surface Realisation (SR{'}20) which was organised as part of the COLING{'}20 Workshop on Multilingual Surface Realisation. As in SR{'}18 and SR{'}19, the shared task comprised two tracks: (1) a Shallow Track where the inputs were full UD structures with word order information removed and tokens lemmatised; and (2) a Deep Track where additionally, functional words and morphological information were removed. Moreover, each track had two subtracks: (a) restricted-resource, where only the data provided or approved as part of a track could be used for training models, and (b) open-resource, where any data could be used. The Shallow Track was offered in 11 languages, whereas the Deep Track in 3 ones. Systems were evaluated using both automatic metrics and direct assessment by human evaluators in terms of Readability and Meaning Similarity to reference outputs. We present the evaluation results, along with descriptions of the SR{'}19 tracks, data and evaluation methods, as well as brief summaries of the participating systems. For full descriptions of the participating systems, please see the separate system reports elsewhere in this volume.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:sfmille@gmail.com\"\u003esfmille@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo multiple dialects.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eChinese\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e,\n              \u003ccode\u003eIndonesian\u003c/code\u003e, \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003ePortuguese\u003c/code\u003e,\n              \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUnknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-2.5: Creative Commons Attribution 2.5 Generic\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is intended to be used for training models to solve several NLG subtasks, such as function\n              word introduction, morphological agreement resolution, word order determination and inflection generation.\n            \u003c/p\u003e\n            \u003cp\u003eComment about the license: the dataset has multiple licences, since each original dataset has their own\n              type of licence. All datasets but one are CC-BY and subclasses of it, the other one is GPL (French\n              Sequoia).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe models are able to introduce surface features (syntax, morphology, topology) from more or less\n              abstract inputs in different, the most abstract being predicate-argument structures. The datasets cover a\n              large variety of domains (news, blogs, forums, wikipedia pages, etc.).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e, \u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePompeu Fabra University, Google Inc., University of Brighton, Federal University of Minas Gerais,\n              ADAPT/Trinity College Dublin\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille (Pompeu Fabra University); Leo Wanner (Pompeu Fabra University); Anya Belz (Brighton\n              University); Bernd Bohnet (Google Inc.); Thiago Castro Ferreira (Federal University of Minas Gerais);\n              Yvette Graham (ADAPT/Trinity College Dublin)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMostly EU funds via H2020 projects\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille (Pompeu Fabra University)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003einput\u003c/code\u003e (string): this field contains an input tree in CoNLL-U format; the CoNLL-U format is a\n              one-word-per-line format with the following tab-separated 10 columns (see \u003ca\n                href=\"http://universaldependencies.org/format.html\"\u003ehere\u003c/a\u003e): [1] Position, [2] Lemma, [3] Wordform,\n              [4] Part of Speech, [5] Fine-grained Part of Speech (if available), [6] Features (FEATS), [7] governor,\n              [8] dependency relation, [9] additional dependency information, and [10] metadata. For the surface task,\n              the input is a Universal Dependency tree of a given language in which the word order was scrambled and the\n              surface forms removed (only lemmas are available); for the deep task, the input is a tree derived from the\n              surface input, with predicate-argument relations between content words only (function words were removed)\n              and without any morphological agreement information.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003etarget_tokenized\u003c/code\u003e (string): this field contains the target sentence to generate, in which\n              every non-initial and non-final token is surrounded by two spaces. This output is usually used for\n              automatic evaluations.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003etarget\u003c/code\u003e (string): this field contains the detokenised target sentence to generate. This\n              output is usually used for human evaluations.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003egem_id\u003c/code\u003e (string): a unique ID.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003esentence_id\u003c/code\u003e (string): the original ID of a sentence in the UD dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure of the input (CoNLL-U) was chosen according to the standards in parsing, and because the\n              original UD datasets were provided in this format.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe input labels for the surface track are the original labels in the UD treebanks; see \u003ca\n                href=\"https://universaldependencies.org/u/dep/index.html\"\u003ehere\u003c/a\u003e for the dependencies, \u003ca\n                href=\"https://universaldependencies.org/u/feat/index.html\"\u003ehere\u003c/a\u003e for the features, and \u003ca\n                href=\"https://universaldependencies.org/u/pos/index.html\"\u003ehere\u003c/a\u003e for the PoS tags.\u003c/p\u003e\n            \u003cp\u003eThe input labels for the deep track are a subset of the PoS tags and features of the surface track, and\n              for the relations, universal predicate-argument relations augmented with a few specific relations to\n              capture coordinations and named entity relations for instance.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\"input\": \"1\\tGoogle\\t_\\tPROPN\\tNNP\\tNumber=Sing\\t5\\tnsubj\\t_\\t_\\n2\\t\\t_\\tPUNCT\\t.\\tlin=+1\\t5\\tpunct\\t_\\t_\\n3\\tinto\\t_\\tADP\\tIN\\t_\\t6\\tcase\\t_\\t_\\n4\\tif\\t_\\tSCONJ\\tIN\\t_\\t5\\tmark\\t_\\t_\\n5\\tmorph\\t_\\tVERB\\tVBD\\tMood=Ind|Tense=Past|VerbForm=Fin\\t7\\tadvcl\\t_\\t_\\n6\\tGoogleOS\\t_\\tPROPN\\tNNP\\tNumber=Sing\\t5\\tobl\\t_\\t_\\n7\\twhat\\t_\\tPRON\\tWP\\tPronType=Int\\t0\\troot\\t_\\t_\", \"target_tokenized\": \"What if Google Morphed Into GoogleOS ?\", \"target\": \"What if Google Morphed Into GoogleOS?\", \"gem_id\": \"GEM-surface_realisation_st_2020-T1-test-en_ewt-ud-test-0\", \"sentence_id\": \"\"}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are 119 splits in the dataset:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e29 training sets, which correspond to 20 UD datasets (11 languages), 9 of which have both surface and\n                deep inputs (3 languages);\u003c/li\u003e\n              \u003cli\u003e29 development set which correspond to the 29 training sets above;\u003c/li\u003e\n              \u003cli\u003e29 test sets for the data described above;\u003c/li\u003e\n              \u003cli\u003e4 out-of-domain test sets, 3 surface inputs and 1 deep one (3 languages for which PUD out-of-domain\n                datasets were available);\u003c/li\u003e\n              \u003cli\u003e9 automatically parsed in-domain test sets, 6 surface inputs and 3 deep inputs (6 languages for which\n                good UD parsers were available);\u003c/li\u003e\n              \u003cli\u003e9 automatically parsed out-of-domain test sets, 6 surface inputs and 3 deep inputs (6 languages for\n                which we were able to create clean Wikipedia text and that had a good UD parser).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDescribed above for more clarity.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAn outlier would usually be an input that corresponds to a very long sentence (e.g. 159 words in English,\n              when the average number of words per sentence is around 25).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe datset includes languages from different families and some languages not often used in NLG (e.g.\n              Arabic, Indonesian, Korean, Hindi). It proposes two tasks, which can be tackled both separately and in one\n              shot, with different levels of difficulty: the most superficial task (T1) consits in ordering and\n              inflecting some trees, and the deeper task (T2) includes extra tasks such as defining the syntactic\n              structure and introducing function words and morphological agreement information. Both tasks can allow for\n              developing modules for pipeline NLG architectures. T1 is rather straightforward to evaluate: BLEU works\n              quite well for some languages since all the words are present in the input and few word orders only can be\n              possible for a syntactic tree. But T2 is more challenging to evaluate, since more outputs are correct\n              given one particular input.\u003c/p\u003e\n            \u003cp\u003eThere is a large variety of sizes in the datasets, both clean and noisy data, parallel data in different\n              languages, and many already available system outputs to use as baselines.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis is possibly the only dataset that starts the generation process from predicate-argument structures\n              and from syntactic structures. It also has parallel datasets in a few languages (coming from the PUD\n              parallel annotations).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSyntacticisation, functional word introduction, word order resolution, agreement resolution,\n              morphological inflection\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://taln.upf.edu/pages/msr2020-ws/SRST.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSyntacticisation: prediction of the syntactic\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSyntacticisation, functional word introduction, word order resolution, morphological agreement\n              resolution, morphological inflection\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNIST: n-gram similarity metric weighted in favour of less frequent n-grams which are taken to be more\n              informative.\u003c/p\u003e\n            \u003cp\u003eNormalised edit distance (DIST): inverse, normalised, character-based string-edit distance that starts by\n              computing the minimum number of character inserts, deletes and substitutions (all at cost 1) required to\n              turn the system output into the (single) reference text.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU, NIST, BERTScore and DIST simply aim at calculating in different ways the similarity between a\n              predicted and a reference sentence.\u003c/p\u003e\n            \u003cp\u003eTwo additional criteria have been used for human evaluation, Readability and Meaning SImilarity. The\n              statement to be assessed in the Readability evaluation was: \"The text reads well and is free from\n              grammatical errors and awkward constructions.\". The corresponding statement in the Meaning Similarity\n              evaluation, in which system outputs (‘the black text’) were compared to reference sentences (‘the gray\n              text’), was: \"The meaning of the gray text is adequately expressed by the black text.\"\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSame as above.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.134/\"\u003eFast and Accurate Non-Projective Dependency Tree\n                  Linearization\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.665/\"\u003eShape of Synth to Come: Why We Should Use\n                  Synthetic Data for English Surface Realization\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe datasets were created in the context of the Surface Realisation Shared Task series.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset's objective was to allow for training systems to perform tasks related to surface realisation\n              (introduction of function words, syntacticisation, resolution of morphological agreements, word order\n              resolution, inflection generation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach of the 20 used UD datasets comes from various sources, all listed on the individual page of each UD\n              treeebank (\u003ca href=\"https://universaldependencies.org/\"\u003ehttps://universaldependencies.org/\u003c/a\u003e).\u003c/p\u003e\n            \u003cp\u003eAdditional test sets were created for the task, and were obtained from Wikipedia pages for 6 languages.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are numerous sources of language in the multiple datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere is a large variety of topics in the multiple datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe text data was detokenised so as to create references for automatic evaluations (several languages\n              don't use spaces to separate words, and running metrics like BLEU would not make sense without separating\n              all the tokens in a sentence).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ehybrid\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor the Wikipedia test created for the shared task, extensive filtering was applied to achieve reasonably\n              good text quality. Sentences that include special characters, contain unusual tokens (e.g. ISBN), or have\n              unbalanced quotation marks or brackets were skipped. Furthermore, only sentences with more than 5 tokens\n              and shorter than 50 tokens were selected. After the initial filtering, quite a few malformed sentences\n              remained. In order to remove those, the sentences were scored with BERT and\n              only the top half scored sentences were kept. Finally, via manual inspection, patterns and expressions\n              were identified to\n              further reduce the number of malformed sentences.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Universal Dependency data had been previously used for shared tasks on parsing, so it made sense to\n              reuse it for generation.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThanks to the original work of the UD dataset creators, the surface realisation dataset addresses a few\n              languages which are possibly under-served in NLG: e.g. Arabic, Hindi, Indonesian, Korean.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt is very likely that the distribution of language producers is not fully represented in the datasets of\n              each language.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo risks foreseen.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003emultiple licenses\u003c/code\u003e, \u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003emultiple licenses\u003c/code\u003e, \u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe deep track inputs (predicate-argument structures) are not of perfect quality, they were derived\n              automatically from gold or predicted syntactic parses using handcrafted grammars.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe datasets are probably not fitted to train tools to produce \"unusual\" languages (e.g. poetry, kid\n              writing etc.).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo be thought of :)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"surface_realisation_st_2020","type":"Data-to-Text","languages":"Arabic, Chinese, English, French, Hindi, Indonesian, Japanese, Korean, Portuguese, Russian, Spanish, Castilian","summary":"This dataset was used as part of the multilingual surface realization shared task in which a model gets full or partial universal dependency structures and has to reconstruct the natural language. This dataset support 11 languages."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"surface_realisation_st_2020"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"surface_realisation_st_2020","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003esurface_realisation_st_2020\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis dataset was used as part of the multilingual surface realization shared task in which a model gets full\n          or partial universal dependency structures and has to reconstruct the natural language. This dataset support\n          11 languages.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/surface_realisation_st_2020')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca\n            href=\"https://huggingface.co/datasets/GEM/surface_realisation_st_2020\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://taln.upf.edu/pages/msr2020-ws/SRST.html#data\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.msr-1.1/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSimon Mille (Pompeu Fabra University); Leo Wanner (Pompeu Fabra University); Anya Belz (Brighton University);\n          Bernd Bohnet (Google Inc.); Thiago Castro Ferreira (Federal University of Minas Gerais); Yvette Graham\n          (ADAPT/Trinity College Dublin)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSimon Mille\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eChinese\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e,\n          \u003ccode\u003eIndonesian\u003c/code\u003e, \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003ePortuguese\u003c/code\u003e,\n          \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-2.5: Creative Commons Attribution 2.5 Generic\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe models are able to introduce surface features (syntax, morphology, topology) from more or less abstract\n          inputs in different, the most abstract being predicate-argument structures. The datasets cover a large variety\n          of domains (news, blogs, forums, wikipedia pages, etc.).\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://taln.upf.edu/pages/msr2020-ws/SRST.html#data\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://sites.google.com/site/genchalrepository/surface-realisation/sr-20-multilingual\"\u003eWebsite\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.msr-1.1/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{mille-etal-2020-third,\ntitle = \"The Third Multilingual Surface Realisation Shared Task ({SR}{'}20): Overview and Evaluation Results\",\nauthor = \"Mille, Simon  and\nBelz, Anya  and\nBohnet, Bernd  and\nCastro Ferreira, Thiago  and\nGraham, Yvette  and\nWanner, Leo\",\nbooktitle = \"Proceedings of the Third Workshop on Multilingual Surface Realisation\",\nmonth = dec,\nyear = \"2020\",\naddress = \"Barcelona, Spain (Online)\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2020.msr-1.1\",\npages = \"1--20\",\nabstract = \"This paper presents results from the Third Shared Task on Multilingual Surface Realisation (SR{'}20) which was organised as part of the COLING{'}20 Workshop on Multilingual Surface Realisation. As in SR{'}18 and SR{'}19, the shared task comprised two tracks: (1) a Shallow Track where the inputs were full UD structures with word order information removed and tokens lemmatised; and (2) a Deep Track where additionally, functional words and morphological information were removed. Moreover, each track had two subtracks: (a) restricted-resource, where only the data provided or approved as part of a track could be used for training models, and (b) open-resource, where any data could be used. The Shallow Track was offered in 11 languages, whereas the Deep Track in 3 ones. Systems were evaluated using both automatic metrics and direct assessment by human evaluators in terms of Readability and Meaning Similarity to reference outputs. We present the evaluation results, along with descriptions of the SR{'}19 tracks, data and evaluation methods, as well as brief summaries of the participating systems. For full descriptions of the participating systems, please see the separate system reports elsewhere in this volume.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:sfmille@gmail.com\"\u003esfmille@gmail.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo multiple dialects.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eChinese\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e,\n              \u003ccode\u003eIndonesian\u003c/code\u003e, \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003ePortuguese\u003c/code\u003e,\n              \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUnknown\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-2.5: Creative Commons Attribution 2.5 Generic\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is intended to be used for training models to solve several NLG subtasks, such as function\n              word introduction, morphological agreement resolution, word order determination and inflection generation.\n            \u003c/p\u003e\n            \u003cp\u003eComment about the license: the dataset has multiple licences, since each original dataset has their own\n              type of licence. All datasets but one are CC-BY and subclasses of it, the other one is GPL (French\n              Sequoia).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe models are able to introduce surface features (syntax, morphology, topology) from more or less\n              abstract inputs in different, the most abstract being predicate-argument structures. The datasets cover a\n              large variety of domains (news, blogs, forums, wikipedia pages, etc.).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e, \u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePompeu Fabra University, Google Inc., University of Brighton, Federal University of Minas Gerais,\n              ADAPT/Trinity College Dublin\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille (Pompeu Fabra University); Leo Wanner (Pompeu Fabra University); Anya Belz (Brighton\n              University); Bernd Bohnet (Google Inc.); Thiago Castro Ferreira (Federal University of Minas Gerais);\n              Yvette Graham (ADAPT/Trinity College Dublin)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMostly EU funds via H2020 projects\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille (Pompeu Fabra University)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003einput\u003c/code\u003e (string): this field contains an input tree in CoNLL-U format; the CoNLL-U format is a\n              one-word-per-line format with the following tab-separated 10 columns (see \u003ca\n                href=\"http://universaldependencies.org/format.html\"\u003ehere\u003c/a\u003e): [1] Position, [2] Lemma, [3] Wordform,\n              [4] Part of Speech, [5] Fine-grained Part of Speech (if available), [6] Features (FEATS), [7] governor,\n              [8] dependency relation, [9] additional dependency information, and [10] metadata. For the surface task,\n              the input is a Universal Dependency tree of a given language in which the word order was scrambled and the\n              surface forms removed (only lemmas are available); for the deep task, the input is a tree derived from the\n              surface input, with predicate-argument relations between content words only (function words were removed)\n              and without any morphological agreement information.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003etarget_tokenized\u003c/code\u003e (string): this field contains the target sentence to generate, in which\n              every non-initial and non-final token is surrounded by two spaces. This output is usually used for\n              automatic evaluations.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003etarget\u003c/code\u003e (string): this field contains the detokenised target sentence to generate. This\n              output is usually used for human evaluations.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003egem_id\u003c/code\u003e (string): a unique ID.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003esentence_id\u003c/code\u003e (string): the original ID of a sentence in the UD dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure of the input (CoNLL-U) was chosen according to the standards in parsing, and because the\n              original UD datasets were provided in this format.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe input labels for the surface track are the original labels in the UD treebanks; see \u003ca\n                href=\"https://universaldependencies.org/u/dep/index.html\"\u003ehere\u003c/a\u003e for the dependencies, \u003ca\n                href=\"https://universaldependencies.org/u/feat/index.html\"\u003ehere\u003c/a\u003e for the features, and \u003ca\n                href=\"https://universaldependencies.org/u/pos/index.html\"\u003ehere\u003c/a\u003e for the PoS tags.\u003c/p\u003e\n            \u003cp\u003eThe input labels for the deep track are a subset of the PoS tags and features of the surface track, and\n              for the relations, universal predicate-argument relations augmented with a few specific relations to\n              capture coordinations and named entity relations for instance.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\"input\": \"1\\tGoogle\\t_\\tPROPN\\tNNP\\tNumber=Sing\\t5\\tnsubj\\t_\\t_\\n2\\t\\t_\\tPUNCT\\t.\\tlin=+1\\t5\\tpunct\\t_\\t_\\n3\\tinto\\t_\\tADP\\tIN\\t_\\t6\\tcase\\t_\\t_\\n4\\tif\\t_\\tSCONJ\\tIN\\t_\\t5\\tmark\\t_\\t_\\n5\\tmorph\\t_\\tVERB\\tVBD\\tMood=Ind|Tense=Past|VerbForm=Fin\\t7\\tadvcl\\t_\\t_\\n6\\tGoogleOS\\t_\\tPROPN\\tNNP\\tNumber=Sing\\t5\\tobl\\t_\\t_\\n7\\twhat\\t_\\tPRON\\tWP\\tPronType=Int\\t0\\troot\\t_\\t_\", \"target_tokenized\": \"What if Google Morphed Into GoogleOS ?\", \"target\": \"What if Google Morphed Into GoogleOS?\", \"gem_id\": \"GEM-surface_realisation_st_2020-T1-test-en_ewt-ud-test-0\", \"sentence_id\": \"\"}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are 119 splits in the dataset:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e29 training sets, which correspond to 20 UD datasets (11 languages), 9 of which have both surface and\n                deep inputs (3 languages);\u003c/li\u003e\n              \u003cli\u003e29 development set which correspond to the 29 training sets above;\u003c/li\u003e\n              \u003cli\u003e29 test sets for the data described above;\u003c/li\u003e\n              \u003cli\u003e4 out-of-domain test sets, 3 surface inputs and 1 deep one (3 languages for which PUD out-of-domain\n                datasets were available);\u003c/li\u003e\n              \u003cli\u003e9 automatically parsed in-domain test sets, 6 surface inputs and 3 deep inputs (6 languages for which\n                good UD parsers were available);\u003c/li\u003e\n              \u003cli\u003e9 automatically parsed out-of-domain test sets, 6 surface inputs and 3 deep inputs (6 languages for\n                which we were able to create clean Wikipedia text and that had a good UD parser).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDescribed above for more clarity.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAn outlier would usually be an input that corresponds to a very long sentence (e.g. 159 words in English,\n              when the average number of words per sentence is around 25).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe datset includes languages from different families and some languages not often used in NLG (e.g.\n              Arabic, Indonesian, Korean, Hindi). It proposes two tasks, which can be tackled both separately and in one\n              shot, with different levels of difficulty: the most superficial task (T1) consits in ordering and\n              inflecting some trees, and the deeper task (T2) includes extra tasks such as defining the syntactic\n              structure and introducing function words and morphological agreement information. Both tasks can allow for\n              developing modules for pipeline NLG architectures. T1 is rather straightforward to evaluate: BLEU works\n              quite well for some languages since all the words are present in the input and few word orders only can be\n              possible for a syntactic tree. But T2 is more challenging to evaluate, since more outputs are correct\n              given one particular input.\u003c/p\u003e\n            \u003cp\u003eThere is a large variety of sizes in the datasets, both clean and noisy data, parallel data in different\n              languages, and many already available system outputs to use as baselines.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis is possibly the only dataset that starts the generation process from predicate-argument structures\n              and from syntactic structures. It also has parallel datasets in a few languages (coming from the PUD\n              parallel annotations).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSyntacticisation, functional word introduction, word order resolution, agreement resolution,\n              morphological inflection\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://taln.upf.edu/pages/msr2020-ws/SRST.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSyntacticisation: prediction of the syntactic\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSyntacticisation, functional word introduction, word order resolution, morphological agreement\n              resolution, morphological inflection\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNIST: n-gram similarity metric weighted in favour of less frequent n-grams which are taken to be more\n              informative.\u003c/p\u003e\n            \u003cp\u003eNormalised edit distance (DIST): inverse, normalised, character-based string-edit distance that starts by\n              computing the minimum number of character inserts, deletes and substitutions (all at cost 1) required to\n              turn the system output into the (single) reference text.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBLEU, NIST, BERTScore and DIST simply aim at calculating in different ways the similarity between a\n              predicted and a reference sentence.\u003c/p\u003e\n            \u003cp\u003eTwo additional criteria have been used for human evaluation, Readability and Meaning SImilarity. The\n              statement to be assessed in the Readability evaluation was: \"The text reads well and is free from\n              grammatical errors and awkward constructions.\". The corresponding statement in the Meaning Similarity\n              evaluation, in which system outputs (‘the black text’) were compared to reference sentences (‘the gray\n              text’), was: \"The meaning of the gray text is adequately expressed by the black text.\"\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSame as above.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.134/\"\u003eFast and Accurate Non-Projective Dependency Tree\n                  Linearization\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.665/\"\u003eShape of Synth to Come: Why We Should Use\n                  Synthetic Data for English Surface Realization\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe datasets were created in the context of the Surface Realisation Shared Task series.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset's objective was to allow for training systems to perform tasks related to surface realisation\n              (introduction of function words, syntacticisation, resolution of morphological agreements, word order\n              resolution, inflection generation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach of the 20 used UD datasets comes from various sources, all listed on the individual page of each UD\n              treeebank (\u003ca href=\"https://universaldependencies.org/\"\u003ehttps://universaldependencies.org/\u003c/a\u003e).\u003c/p\u003e\n            \u003cp\u003eAdditional test sets were created for the task, and were obtained from Wikipedia pages for 6 languages.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are numerous sources of language in the multiple datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere is a large variety of topics in the multiple datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe text data was detokenised so as to create references for automatic evaluations (several languages\n              don't use spaces to separate words, and running metrics like BLEU would not make sense without separating\n              all the tokens in a sentence).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ehybrid\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor the Wikipedia test created for the shared task, extensive filtering was applied to achieve reasonably\n              good text quality. Sentences that include special characters, contain unusual tokens (e.g. ISBN), or have\n              unbalanced quotation marks or brackets were skipped. Furthermore, only sentences with more than 5 tokens\n              and shorter than 50 tokens were selected. After the initial filtering, quite a few malformed sentences\n              remained. In order to remove those, the sentences were scored with BERT and\n              only the top half scored sentences were kept. Finally, via manual inspection, patterns and expressions\n              were identified to\n              further reduce the number of malformed sentences.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Universal Dependency data had been previously used for shared tasks on parsing, so it made sense to\n              reuse it for generation.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThanks to the original work of the UD dataset creators, the surface realisation dataset addresses a few\n              languages which are possibly under-served in NLG: e.g. Arabic, Hindi, Indonesian, Korean.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt is very likely that the distribution of language producers is not fully represented in the datasets of\n              each language.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo risks foreseen.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003emultiple licenses\u003c/code\u003e, \u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003emultiple licenses\u003c/code\u003e, \u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe deep track inputs (predicate-argument structures) are not of perfect quality, they were derived\n              automatically from gold or predicted syntactic parses using handcrafted grammars.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe datasets are probably not fitted to train tools to produce \"unusual\" languages (e.g. poetry, kid\n              writing etc.).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTo be thought of :)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"surface_realisation_st_2020","type":"Data-to-Text","languages":"Arabic, Chinese, English, French, Hindi, Indonesian, Japanese, Korean, Portuguese, Russian, Spanish, Castilian","summary":"This dataset was used as part of the multilingual surface realization shared task in which a model gets full or partial universal dependency structures and has to reconstruct the natural language. This dataset support 11 languages."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"surface_realisation_st_2020"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/totto.html b/data_cards/totto.html
index 2fd64c2b..e79bc957 100644
--- a/data_cards/totto.html
+++ b/data_cards/totto.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->totto</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">totto</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->totto</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">totto</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>totto</h2>
@@ -2695,4 +2695,4 @@ <h5>Technical Limitations
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"totto","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003etotto\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eToTTo is a high-quality English table-to-text dataset with more than 100,000 examples in which a table from\n          Wikipedia with highlighted cells is paired with a sentence that describes the highlighted cells. All examples\n          in the dataset were post-edited in multiple steps to ensure that the targets are fully faithful to the input\n          information.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/totto')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/totto\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.89\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAnkur Parikh, Xuezhi Wang, Sebastian Gehrmann, Manaal Faruqui, Bhuwan Dhingra, Diyi Yang, Dipanjan Das\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAnkur Parikh\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-3.0: Creative Commons Attribution Share Alike 3.0 Unported\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe speaker is required to produce a single, coherent English sentence that describes the highlighted cells\n          in the given table, also using metadata and any other information from the table as applicable.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/totto\"\u003eToTTo Main Repo\u003c/a\u003e + \u003ca\n                href=\"https://github.com/google-research/language/tree/master/language/totto\"\u003eToTTo Supplementary\n                Repo\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.89\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{parikh-etal-2020-totto,\ntitle = \"{ToTTo}: A Controlled Table-To-Text Generation Dataset\",\nauthor = \"Parikh, Ankur  and\nWang, Xuezhi  and\nGehrmann, Sebastian  and\nFaruqui, Manaal  and\nDhingra, Bhuwan  and\nYang, Diyi  and\nDas, Dipanjan\",\nbooktitle = \"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)\",\nmonth = nov,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2020.emnlp-main.89\",\ndoi = \"10.18653/v1/2020.emnlp-main.89\",\npages = \"1173--1186\",\nabstract = \"We present ToTTo, an open-domain English table-to-text dataset with over 120,000 training examples that proposes a controlled generation task: given a Wikipedia table and a set of highlighted table cells, produce a one-sentence description. To obtain generated targets that are natural but also faithful to the source table, we introduce a dataset construction process where annotators directly revise existing candidate sentences from Wikipedia. We present systematic analyses of our dataset and annotation process as well as results achieved by several state-of-the-art baselines. While usually fluent, existing methods often hallucinate phrases that are not supported by the table, suggesting that this dataset can serve as a useful research benchmark for high-precision conditional text generation.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnkur Parikh\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:totto@google.com\"\u003etotto@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/totto\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset has an associated, active \u003ca\n                href=\"https://github.com/google-research-datasets/totto#leaderboard\"\u003eleaderboard\u003c/a\u003e maintained by the\n              authors.\n              The test set ground truth targets / references are private, i.e they are not publicly shared or\n              downloadable - hence, leaderboard submission is necessary for test set evaluation.\n              To evaluate your model on the dev or test set AND/OR submit to the leaderboard, you need to submit your\n              model files through this \u003ca href=\"https://forms.gle/AcF9TRqWrPhPzztt7\"\u003eform\u003c/a\u003e (The form provides an\n              option to opt-out of going on the leaderboard).\u003c/p\u003e\n            \u003cp\u003eThe leaderboard reports three sets of BLEU, PARENT and BLEURT scores for each submission - on the overall\n              test set, the \u003cem\u003eOverlap\u003c/em\u003e subset of the test set and the \u003cem\u003enon-Overlap\u003c/em\u003e subset of the test set.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo specific dialects. The original language is from Wikipedia and it was post-edited by crowdraters\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language is post-edited English only (BCP-47: \u003ccode\u003een\u003c/code\u003e) Wikipedia text. No demographic\n              information about annotators is provided.\n              Some amounts of what may be called non-English text, including characters such as French accents or\n              Cyrillic characters, could sometimes occur, especially through fields with entity names as values in the\n              input table cells.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-3.0: Creative Commons Attribution Share Alike 3.0 Unported\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eToTTo is a Table-to-Text NLG task, as the paper title says. The task is as follows: Given a Wikipedia\n              table with row names, column names and table cells, with a subset of cells highlighted, generate a natural\n              language description for the highlighted part of the table . The table need not be exactly rectangular in\n              that - cells can sometimes be multi-row or multi-column.\u003c/p\u003e\n            \u003cp\u003eAn earlier example of a Table-to-Text NLG task is \u003ca href=\"https://arxiv.org/abs/1603.07771\"\u003eWikibio\u003c/a\u003e\n              - here the inputs were Wikipedia infoboxes (from the top right corner of entity-related Wiki pages). In\n              contrast, ToTTo mostly has Wikipedia tables from the main article content itself. In general,\n              Table-To-Text NLG tasks can be seen as a subclass of Data-To-Text NLG tasks - where the task is to\n              generate natural language descriptions of inputs which are in the form of structured or semi-structured\n              data. In general, all Data-To-Text NLG tasks need not have an explicit table or other structure - e.g the\n              input in \u003ca href=\"https://www.aclweb.org/anthology/W16-6626.pdf\"\u003eWebNLG\u003c/a\u003e is simply a list of triples.\n            \u003c/p\u003e\n            \u003cp\u003eImportantly, ToTTo differs from earlier examples of Table-To-Text NLG in that:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003e\n                \u003cp\u003eIt does not suffer from the problem of divergent references - where ground truth descriptions\n                  themselves have additional information not found in the table. ToTTo overcomes this by having a\n                  multi-step annotation process to edit the initial, free-form table descriptions (which are from\n                  Wikipedia) to make them faithful, unambiguous and independent of article context.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003eSince it provides \u003cstrong\u003econtrol\u003c/strong\u003e in the form of highlighted table cells, it prevents the\n                  problem of there being a large number of valid descriptions focussing on different parts of the table.\n                \u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a single, coherent English sentence that describes the highlighted\n              cells in the given table, also using metadata and any other information from the table as applicable.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle Research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnkur Parikh, Xuezhi Wang, Sebastian Gehrmann, Manaal Faruqui, Bhuwan Dhingra, Diyi Yang, Dipanjan Das\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle Research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eVarun Gangal created the initial data card and Yacine Jernite wrote the data loader. The data card was\n              updated with new splits by Simon Mille. Sebastian Gehrmann ported the data card and loader from the v1 to\n              the v2 version and extended it with the new fields.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eThe \u003ccode\u003etable\u003c/code\u003e field is a \u003ccode\u003eList[List[Dict]]\u003c/code\u003e in row-major order, with outer lists\n                representing rows and the inner lists columns.\u003c/li\u003e\n              \u003cli\u003eEach \u003ccode\u003eDict\u003c/code\u003e has the fields \u003ccode\u003ecolumn_span: int\u003c/code\u003e, \u003ccode\u003eis_header: bool\u003c/code\u003e,\n                \u003ccode\u003erow_span: int\u003c/code\u003e, and \u003ccode\u003evalue: str\u003c/code\u003e.\u003c/li\u003e\n              \u003cli\u003eTable metadata consists of \u003ccode\u003etable_page_title\u003c/code\u003e, \u003ccode\u003etable_section_title\u003c/code\u003e and\n                \u003ccode\u003etable_section_texts\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003eThe \u003ccode\u003ehighlighted_cells\u003c/code\u003e are represented as \u003ccode\u003eList[[row_index,column_index]]\u003c/code\u003e,\n                with each \u003ccode\u003e[row_index,column_index]\u003c/code\u003e indicating that\n                \u003ccode\u003etable[row_index][column_index]\u003c/code\u003e is highlighted.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eexample_id\u003c/code\u003e is the unique id per example.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esentence_annotations[final_sentence]\u003c/code\u003e which is the table description/generation target\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure is aimed to encode highlighted tables in a way that allows rows and columns to span\n              multiple fields in width. The other fields are meta-data about the source and the annotations\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe initial table-description pairs are tables from Wikipedia articles, extracted through heuristics such\n              as Number Matching (tables and sentences that overlap with a non-date number of atleast 3 non-zero digits)\n              (Refer to Section 4 of the paper for more)\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eTable Readability: Tables which are deemed non-readable (due to foreign language, poor formatting etc\n                - a very small fraction of 0.5%) are removed from the dataset here.\u003c/li\u003e\n              \u003cli\u003eCell Highlighting: The annotator highlights the cells of the table which support the description.\u003c/li\u003e\n              \u003cli\u003eDeletion: The annotator removes phrases in the description which are not supported by the highlighted\n                cells\u003c/li\u003e\n              \u003cli\u003eDecontextualization: Descriptions may contain pronouns or other forms of anaphora, or other phenomena\n                which depend on the overall article topic - these are fixed by replacement (e.g replacing pronouns with\n                the entity, provided it occurs in the table). The replacements allowed are limited to one, and\n                annotators are also instructed to conserve fluency.\u003c/li\u003e\n              \u003cli\u003eSecondary Annotation: A second set of annotators is shown the output of Stage 4, and asked to fix it\n                if required to ensure it is grammatical.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe main repository's \u003ccode\u003eREADME.md\u003c/code\u003e already provides a thorough walkthrough of data instances\n              and fields \u003ca href=\"https://github.com/google-research-datasets/totto#dataset-description\"\u003ehere\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eBelow is the instance for a table from the wiki-page for the musical artist \u003cem\u003eWeird Al' Yankovic\u003c/em\u003e ,\n              likely listing his on-television appearances.\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e    {\n\"table_page_title\": \"'Weird Al' Yankovic\",\n\"table_webpage_url\": \"https://en.wikipedia.org/wiki/%22Weird_Al%22_Yankovic\",\n\"table_section_title\": \"Television\",\n\"table_section_text\": \"\",\n\"table\": \"[Described below]\",\n\"highlighted_cells\": [[22, 2], [22, 3], [22, 0], [22, 1], [23, 3], [23, 1], [23, 0]],\n\"example_id\": 12345678912345678912,\n\"sentence_annotations\": [{\"original_sentence\": \"In 2016, Al appeared in 2 episodes of BoJack Horseman as Mr. Peanutbutter's brother, Captain Peanutbutter, and was hired to voice the lead role in the 2016 Disney XD series Milo Murphy's Law.\",\n    \"sentence_after_deletion\": \"In 2016, Al appeared in 2 episodes of BoJack Horseman as Captain Peanutbutter, and was hired to the lead role in the 2016 series Milo Murphy's Law.\",\n    \"sentence_after_ambiguity\": \"In 2016, Al appeared in 2 episodes of BoJack Horseman as Captain Peanutbutter, and was hired for the lead role in the 2016 series Milo Murphy's 'Law.\",\n    \"final_sentence\": \"In 2016, Al appeared in 2 episodes of BoJack Horseman as Captain Peanutbutter and was hired for the lead role in the 2016 series Milo Murphy's Law.\"}],\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe \u003ccode\u003etable\u003c/code\u003e field is expanded as below:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e    [\n[\n  {\n      \"column_span\": 1,\n       \"is_header\": true,\n       \"row_span\": 1,\n       \"value\": \"Year\"},\n  {    \"column_span\": 1,\n       \"is_header\": true,\n       \"row_span\": 1,\n       \"value\": \"Title\"},\n  {    \"column_span\": 1,\n       \"is_header\": true,\n       \"row_span\": 1,\n       \"value\": \"Role\"},\n  {    \"column_span\": 1,\n       \"is_header\": true,\n       \"row_span\": 1,\n       \"value\": \"Notes\"}\n],\n[\n  {    \"column_span\": 1,\n       \"is_header\": false,\n       \"row_span\": 1,\n       \"value\": \"1997\"},\n  {    \"column_span\": 1,\n       \"is_header\": false,\n       \"row_span\": 1,\n       \"value\": \"Eek! The Cat\"},\n  {    \"column_span\": 1,\n       \"is_header\": false,\n       \"row_span\": 1,\n       \"value\": \"Himself\"},\n  {    \"column_span\": 1,\n       \"is_header\": false,\n       \"row_span\": 1,\n       \"value\": \"Episode: 'The FugEektive'\"}\n], ...\n]\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe \u003ca href=\"https://github.com/google-research/language/tree/master/language/totto\"\u003eSupplementary\n                Repo\u003c/a\u003e also provides browsable samples under its \u003ccode\u003esample/\u003c/code\u003e folder. It additionally provides\n              HTML visualization scripts with their outputs located under the aforementioned folder. The instructions to\n              access and visualize these samples can also be found \u003ca\n                href=\"https://github.com/google-research/language/tree/master/language/totto#visualizing-sample-data\"\u003ehere\u003c/a\u003e.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset consists of 120,000 train examples and equi-sized dev and test sets with 7700 examples.\n              Refer to Table 5 in the paper for a more extensive list of properties about table size, target vocabulary\n              etc and their aggregates.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dev and test splits are further equally distributed between \u003cem\u003eOverlap\u003c/em\u003e and \u003cem\u003enon-Overlap\u003c/em\u003e\n              .\n              The examples in the \u003cem\u003eOverlap\u003c/em\u003e set are harder on account of the domain shift resulting from them\n              having none of their header (row and column) names in common with those seen during training.\u003c/p\u003e\n            \u003cp\u003eRefer to Table 5 in the paper for a more extensive list of properties about table size, target vocabulary\n              etc and their aggregates.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are some very large tables in the dataset with thousands of rows. Table 7 shows some of the\n              challenges of the dataset, showing that very few examples require access to the table description itself\n              which makes those examples an outlier.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eToTTo is one of the two datasets representing Table-to-Text NLG in GEM, the other one being \u003ca\n                href=\"https://arxiv.org/pdf/2007.02871.pdf\"\u003eDART\u003c/a\u003e. Unlike DART, which combines datasets from multiple\n              sources and furnishes them in a unified setting, ToTTo is from a homogeneous source. As explained in the\n              Task Summary above, it also has an annotation process explicitly crafted to reduce divergent descriptions,\n              which is not true of DART.\u003c/p\u003e\n            \u003cp\u003eFurthermore, ToTTo is also an instance of a \u003cstrong\u003econtrolled\u003c/strong\u003e generation task - where in\n              addition to the input (in this case the table) an additional \u003cstrong\u003econtrol\u003c/strong\u003e (in this case the\n              highlighted cells) is given as an additional goal for the generation. The DART task formulation does not\n              include controls.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe input is much more complex and the quality much better than that of comparable datasets. The\n              highlighted table cells provide a unique challenge to models.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning, surface realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e9 challenge sets for ToTTo were added to the GEM evaluation suite, 8 created specifically for the task\n              and 1 coming from the original data.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eWe created subsets of the training and development sets of 500 randomly selected inputs each.\u003c/li\u003e\n              \u003cli\u003eWe applied input scrambling on a subset of 500 randomly selected test instances; the order of the\n                highlighted cells was randomly reassigned.\u003c/li\u003e\n              \u003cli\u003eFor the input size, we created subpopulations based on the number of input highlighted cells in the\n                whole table.\u003c/li\u003e\n            \u003c/ol\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput length\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                    \u003ctd\u003e898\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e1850\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e2221\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e1369\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e483\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e379\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e124\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e8\u003c/td\u003e\n                    \u003ctd\u003e128\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                    \u003ctd\u003e61\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                    \u003ctd\u003e40\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                    \u003ctd\u003e20\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                    \u003ctd\u003e26\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e13\u003c/td\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e15\u003c/td\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e16\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e17\u003c/td\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e18\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e19\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e20\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e21\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e22\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e23\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e24\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e25\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e26...496\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003col start=\"4\"\u003e\n              \u003cli\u003eWe also divided the test set according to the size of the whole table, based on the idea that larger\n                tables represent a bigger space to take into account when generating the highlighted cells; a larger\n                table could be more challenging to generate accurate text than a smaller table. There are 693 different\n                table sizes, ranging from 2 to 15834 cells.\u003c/li\u003e\n            \u003c/ol\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eTable size\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e71\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e52\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e41\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e144\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e47\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e8\u003c/td\u003e\n                    \u003ctd\u003e59\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                    \u003ctd\u003e105\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                    \u003ctd\u003e162\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                    \u003ctd\u003e158\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e13\u003c/td\u003e\n                    \u003ctd\u003e35\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                    \u003ctd\u003e79\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e15\u003c/td\u003e\n                    \u003ctd\u003e136\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e16\u003c/td\u003e\n                    \u003ctd\u003e111\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e17\u003c/td\u003e\n                    \u003ctd\u003e48\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e18\u003c/td\u003e\n                    \u003ctd\u003e123\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e19\u003c/td\u003e\n                    \u003ctd\u003e29\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e20\u003c/td\u003e\n                    \u003ctd\u003e112\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e21\u003c/td\u003e\n                    \u003ctd\u003e91\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e22\u003c/td\u003e\n                    \u003ctd\u003e17\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e23\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e24\u003c/td\u003e\n                    \u003ctd\u003e169\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e25\u003c/td\u003e\n                    \u003ctd\u003e56\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e26\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e27\u003c/td\u003e\n                    \u003ctd\u003e40\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e28\u003c/td\u003e\n                    \u003ctd\u003e77\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e29\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e30\u003c/td\u003e\n                    \u003ctd\u003e122\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e31\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e32\u003c/td\u003e\n                    \u003ctd\u003e49\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e33\u003c/td\u003e\n                    \u003ctd\u003e21\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e34\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e35\u003c/td\u003e\n                    \u003ctd\u003e103\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                    \u003ctd\u003e131\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e37\u003c/td\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e38\u003c/td\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e39\u003c/td\u003e\n                    \u003ctd\u003e26\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e40\u003c/td\u003e\n                    \u003ctd\u003e110\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e41\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e42\u003c/td\u003e\n                    \u003ctd\u003e54\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e43\u003c/td\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e44\u003c/td\u003e\n                    \u003ctd\u003e47\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e45\u003c/td\u003e\n                    \u003ctd\u003e79\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e46\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e47\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e48\u003c/td\u003e\n                    \u003ctd\u003e114\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e49\u003c/td\u003e\n                    \u003ctd\u003e18\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e50\u003c/td\u003e\n                    \u003ctd\u003e55\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e51\u003c/td\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e52\u003c/td\u003e\n                    \u003ctd\u003e43\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e54\u003c/td\u003e\n                    \u003ctd\u003e80\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e55\u003c/td\u003e\n                    \u003ctd\u003e73\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e56\u003c/td\u003e\n                    \u003ctd\u003e64\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e57\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e58\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e60\u003c/td\u003e\n                    \u003ctd\u003e114\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e61\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e63\u003c/td\u003e\n                    \u003ctd\u003e39\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e64\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e65\u003c/td\u003e\n                    \u003ctd\u003e62\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e66\u003c/td\u003e\n                    \u003ctd\u003e48\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e67\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e68\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e69\u003c/td\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e70\u003c/td\u003e\n                    \u003ctd\u003e81\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e72\u003c/td\u003e\n                    \u003ctd\u003e76\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e73\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e74\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e75\u003c/td\u003e\n                    \u003ctd\u003e44\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e76\u003c/td\u003e\n                    \u003ctd\u003e33\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e77\u003c/td\u003e\n                    \u003ctd\u003e30\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e78\u003c/td\u003e\n                    \u003ctd\u003e66\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e79\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e80\u003c/td\u003e\n                    \u003ctd\u003e83\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e81\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e82\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e84\u003c/td\u003e\n                    \u003ctd\u003e80\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e85\u003c/td\u003e\n                    \u003ctd\u003e25\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e86\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e87\u003c/td\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e88\u003c/td\u003e\n                    \u003ctd\u003e35\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e90\u003c/td\u003e\n                    \u003ctd\u003e78\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e91\u003c/td\u003e\n                    \u003ctd\u003e18\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e92\u003c/td\u003e\n                    \u003ctd\u003e22\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e93\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e94\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e95\u003c/td\u003e\n                    \u003ctd\u003e31\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e96\u003c/td\u003e\n                    \u003ctd\u003e50\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e98\u003c/td\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e99\u003c/td\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e100\u003c/td\u003e\n                    \u003ctd\u003e48\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e102\u003c/td\u003e\n                    \u003ctd\u003e24\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e104\u003c/td\u003e\n                    \u003ctd\u003e29\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e105\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e106\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e108\u003c/td\u003e\n                    \u003ctd\u003e51\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e110\u003c/td\u003e\n                    \u003ctd\u003e31\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e...8000+\u003c/td\u003e\n                    \u003ctd\u003e(up to 10)\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003col start=\"5\"\u003e\n              \u003cli\u003eWe also created three splits based on the subset of test examples in pages about people.\n                We then used the structured information in WikiData to identify the following information:\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cul\u003e\n              \u003cli\u003egender (male, and female),\u003c/li\u003e\n              \u003cli\u003enationality grouped by continent (Africa, Asia, Europe, North America, Oceania, and South America)\n              \u003c/li\u003e\n              \u003cli\u003eethnicity (African American and all USA)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThe categories within gender, ethnicity, and nationality were chosen based on data availability; The\n              ToTTo dataset includes mostly tables that do not focus on people. As a result, only seven people in the\n              original test set are marked as having a non-binary gender. Similar sparsity informed the grouping of\n              nationalities by continent – only 19 countries are represented by more than 10 people in the test set. In\n              case a person has citizenships across multiple continents, we may include the person in any of the\n              included continents.\u003c/p\u003e\n            \u003cp\u003eFinally, ethnicity is very sparsely annotated in WikiData; only 150 test examples in ToTTo have this\n              information and 128 of these are African Americans. We thus are unable to compare the performance on,\n              e.g., Yoruba or Punjabi people, both of which have fewer than five instances. Another caveat here is that\n              only 21 of the 128 people are female. We thus compare the African American population to results on a\n              subset that includes all US citizens.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003egeneralization, fairness, robustness\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003cp\u003eThe highest spot on the leaderboard is currently held by an anonymous method, with BLEU=49.2,\n                  PARENT=58.7 and BLEURT=0.249 on the \u003cem\u003eOverall\u003c/em\u003e test set.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003eThe \u003cstrong\u003ehighest scoring non-anonymous\u003c/strong\u003e method is the T5-based method of \u003ca\n                    href=\"https://arxiv.org/abs/2005.10433\"\u003eKale, 2020\u003c/a\u003e. This method uses a simple row-major\n                  linearization scheme to convert the table (it chooses only the highlighted cells and ignores the other\n                  cells - table titles and section titles are prefixed at the start of the respective section table) to\n                  a flat string. The linearized input - output description pairs from training examples are then used to\n                  finetune T5, with BLEU being used as the dev metric to pick checkpoints, and beam search with beam\n                  size 10 being the decoding method.\u003c/p\u003e\n                \u003cp\u003eThough the best numbers from this method are naturally from the largest T5-pretrained architecture\n                  (T5-3B), the paper shows improvements over the next-highest BERT-to-BERT method even when using\n                  T5-Base or T5-Small, which have the same and lesser parameters than BERT-to-BERT respectively.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003eThe \u003ca href=\"https://github.com/google-research/language/tree/master/language/totto\"\u003eSupplementary\n                    Repo\u003c/a\u003e provides several useful modules to get started with for new approach implementation:\u003c/p\u003e\n                \u003col\u003e\n                  \u003cli\u003e\n                    \u003cp\u003eCode for the particular preprocessing / linearization scheme used to linearize the tables into\n                      flat sequences for the baseline approaches described in the paper has been described and shared \u003ca\n                        href=\"https://github.com/google-research/language/tree/master/language/totto#baseline-preprocessing\"\u003eherein\u003c/a\u003e\n                    \u003c/p\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003e\n                    \u003cp\u003eAn \u003ca\n                        href=\"https://github.com/google-research/language/tree/master/language/totto#running-the-evaluation-scripts-locally\"\u003eevaluation\n                        script\u003c/a\u003e for locally scoring BLEU and PARENT system outputs on dev (or train) sets. Since\n                      BLEURT is a model-based metric, a \u003ca\n                        href=\"https://github.com/google-research/language/tree/master/language/totto#running-the-evaluation-scripts-locall://github.com/google-research/language/tree/master/language/totto#computing-the-bleurt-score\"\u003eslightly\n                        separate\u003c/a\u003e set of instructions is provided to evaluate on the same.\u003c/p\u003e\n                  \u003c/li\u003e\n                \u003c/ol\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning, surface realization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eBLEURT\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eParent: a metric that measures the F-1 score of overlap between input content words and those used in\n              references and those in generated text while ignoring the general surface form. It can thus measure the\n              faithfulness much better than metrics that measure overlap with a reference\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe metrics are used as in the leaderboard. The original paper additionally conducted a human evaluation\n              focusing on fluency, faithfulness, and coverage.\n              Faithfulness was measured as whether facts in the text are not supported by the input, and coverage as the\n              number of highlighted cells that were considered. They thus represent precision and recall of the content.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSee leaderboard.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTables occurring in Wikipedia articles were chosen as the data source with the following reasons in mind:\n            \u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eWide coverage in terms of both vocabulary and concepts.\u003c/li\u003e\n              \u003cli\u003eWikipedia tables are not confined to a regular structure, with multi-row or multi-column cells\n                occurring with a sufficient frequency.\u003c/li\u003e\n              \u003cli\u003eLikely to contain reasonable-quality, natural text descriptions in the proximity of the table, which\n                are also extractable by heuristics. (see the start of Section 4 for the heuristics used)\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eTo prevent an overlap with the earlier \u003ca href=\"https://arxiv.org/abs/1603.07771\"\u003eWikibio\u003c/a\u003e dataset\n              which focussed on Infobox-first sentence pairs from Wikipedia biography articles, the authors avoid using\n              Infoboxes as a data source.\u003c/p\u003e\n            \u003cp\u003eThe overall curation process of initially collecting free text and then annotator-revising it, was\n              designed to combine the advantages of free-form text descriptions (which are fluent, high-quality and\n              unhurriedly written, but also divergent and unfaithful) with annotator descriptions (which can be tailored\n              to be faithful and to conform exactly to desired task requirements)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a single, coherent English sentence that describes the highlighted\n              cells in the given table, also using metadata and any other information from the table as applicable.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ewikipedia.org\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe basic source language producers are Wikipedia authors and/or editors, since the annotation starts\n              with the natural text description near the Wikipedia table.\n              The auxiliary source language producers are the annotators (two per example) who iteratively revise these\n              descriptions to make them unambiguous and faithful to a subset of highlighted cells in the table.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe initial table-description pairs are tables from Wikipedia articles, extracted through heuristics such\n              as Number Matching (tables and sentences that overlap with a non-date number of atleast 3 non-zero digits)\n              (Refer to Section 4 of the paper for more)\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eTable Readability: Tables which are deemed non-readable (due to foreign language, poor formatting etc\n                - a very small fraction of 0.5%) are removed from the dataset here.\u003c/li\u003e\n              \u003cli\u003eCell Highlighting: The annotator highlights the cells of the table which support the description.\u003c/li\u003e\n              \u003cli\u003eDeletion: The annotator removes phrases in the description which are not supported by the highlighted\n                cells\u003c/li\u003e\n              \u003cli\u003eDecontextualization: Descriptions may contain pronouns or other forms of anaphora, or other phenomena\n                which depend on the overall article topic - these are fixed by replacement (e.g replacing pronouns with\n                the entity, provided it occurs in the table). The replacements allowed are limited to one, and\n                annotators are also instructed to conserve fluency.\u003c/li\u003e\n              \u003cli\u003eSecondary Annotation: A second set of annotators is shown the output of Stage 4, and asked to fix it\n                if required to ensure it is grammatical.\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eThe paper does not specifically describe the annotation platform or location profiles of the annotators.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAfter construction of the splits, the data curators filtered training examples that had rare table header\n              combinations (\u0026#x3C;=5 examples) and which had an overlap with the validation or test splits.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators were full time employees that were aware of the goal of the project and consented to having\n              the data released as part of the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the source data is from wikipedia, only data in the public domain is included in the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eMaintenance Plan Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the original dataset's maintenance plan.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor submissions, you can delete your data by emailing \u003ca\n                href=\"mailto:totto@google.com\"\u003etotto@google.com\u003c/a\u003e from the email account used to sign up for the\n              submission. Deletion requests will be responded to within 60 days.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMaintainer Contact Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide contact information of a person responsible for the dataset maintenance\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnkur Parikh (\u003ca href=\"mailto:aparikh@google.com\"\u003eaparikh@google.com\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny Contestation Mechanism?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the maintenance plan include a contestation mechanism allowing individuals to request removal\n                    fo content?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eform submission\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContestation Form Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the form link or contact information\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:totto@google.com\"\u003etotto@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original work as well as our GEM paper analyzes some biases\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is created using tables and the table cell contents may hence naturally exhibit biases which\n              have been found to exist in Wikipedia such as some forms of gender bias (e.g \u003ca\n                href=\"https://labtomarket.files.wordpress.com/2018/01/wiki_gender_bias.pdf\"\u003e(Graells-Garido et\n                al.,2015)\u003c/a\u003e notes that spouse information is more likely discussed for females than males)\u003c/p\u003e\n            \u003cp\u003eThe table descriptions (targets/references) are, as discussed earlier, collected through a two-step\n              process.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eThe natural text description near the table is taken as a starting point. This is Wikipedia article\n                text as created upto that point in time by a chain of collaborative edits from Wikipedia authors.\u003c/li\u003e\n              \u003cli\u003eThe initial description is revised by chain of two or more annotated revisions, to make it unambiguous\n                and faithful to a set of highlighted table cells.\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eFrom their origin in 1), the descriptions may exhibit biases seen in Wikipedia text as mentioned above.\n              From their revisions in 2), the descriptions may show biases originating from annotator-authored text,\n              such as a preference for shorter descriptions since they're faster to write, or linguistic preferences\n              influenced by the locations dominant in the annotator distribution. (However, note that these are likely\n              to be much reduced since the annotators here are merely revising rather than completely authoring.\n              Moreover, each sentence goes through atleast two annotators, which acts as a check against the personal\n              biases of a single annotator.)\u003c/p\u003e\n            \u003cp\u003eNaturally-occurring text is also known to suffer from other biases such as reporting bias \u003ca\n                href=\"https://openreview.net/forum?id=AzxEzvpdE3Wcy\u0026#x26;noteId=vmR8qaby8fqxittps://labtomarket.files.wordpress.com/2018/01/wiki_gender_bias.pdf\"\u003e(Gordon\n                and Van Durme, 2013)\u003c/a\u003e - this also applies to this dataset via its origin from Wikipedia.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the source data is from wikipedia, only data in the public domain is included in the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is limited to topics that are present in Wikipedia, more specifically those topics that are\n              present in articles which contain atleast one table\n              \u003cem\u003eSports\u003c/em\u003e and \u003cem\u003eCountries\u003c/em\u003e form 53.4% of the dataset. The remaining fraction is made up of\n              broader topics like \u003cem\u003eEurope\u003c/em\u003e, \u003cem\u003eNorth America\u003c/em\u003eand \u003cem\u003ePolitics\u003c/em\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"totto","type":"Data-to-Text","languages":"English","summary":"ToTTo is a high-quality English table-to-text dataset with more than 100,000 examples in which a table from Wikipedia with highlighted cells is paired with a sentence that describes the highlighted cells. All examples in the dataset were post-edited in multiple steps to ensure that the targets are fully faithful to the input information."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"totto"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"totto","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003etotto\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eToTTo is a high-quality English table-to-text dataset with more than 100,000 examples in which a table from\n          Wikipedia with highlighted cells is paired with a sentence that describes the highlighted cells. All examples\n          in the dataset were post-edited in multiple steps to ensure that the targets are fully faithful to the input\n          information.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/totto')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/totto\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.89\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAnkur Parikh, Xuezhi Wang, Sebastian Gehrmann, Manaal Faruqui, Bhuwan Dhingra, Diyi Yang, Dipanjan Das\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eAnkur Parikh\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-3.0: Creative Commons Attribution Share Alike 3.0 Unported\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe speaker is required to produce a single, coherent English sentence that describes the highlighted cells\n          in the given table, also using metadata and any other information from the table as applicable.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/totto\"\u003eToTTo Main Repo\u003c/a\u003e + \u003ca\n                href=\"https://github.com/google-research/language/tree/master/language/totto\"\u003eToTTo Supplementary\n                Repo\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.89\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{parikh-etal-2020-totto,\ntitle = \"{ToTTo}: A Controlled Table-To-Text Generation Dataset\",\nauthor = \"Parikh, Ankur  and\nWang, Xuezhi  and\nGehrmann, Sebastian  and\nFaruqui, Manaal  and\nDhingra, Bhuwan  and\nYang, Diyi  and\nDas, Dipanjan\",\nbooktitle = \"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)\",\nmonth = nov,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2020.emnlp-main.89\",\ndoi = \"10.18653/v1/2020.emnlp-main.89\",\npages = \"1173--1186\",\nabstract = \"We present ToTTo, an open-domain English table-to-text dataset with over 120,000 training examples that proposes a controlled generation task: given a Wikipedia table and a set of highlighted table cells, produce a one-sentence description. To obtain generated targets that are natural but also faithful to the source table, we introduce a dataset construction process where annotators directly revise existing candidate sentences from Wikipedia. We present systematic analyses of our dataset and annotation process as well as results achieved by several state-of-the-art baselines. While usually fluent, existing methods often hallucinate phrases that are not supported by the table, suggesting that this dataset can serve as a useful research benchmark for high-precision conditional text generation.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnkur Parikh\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:totto@google.com\"\u003etotto@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/google-research-datasets/totto\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset has an associated, active \u003ca\n                href=\"https://github.com/google-research-datasets/totto#leaderboard\"\u003eleaderboard\u003c/a\u003e maintained by the\n              authors.\n              The test set ground truth targets / references are private, i.e they are not publicly shared or\n              downloadable - hence, leaderboard submission is necessary for test set evaluation.\n              To evaluate your model on the dev or test set AND/OR submit to the leaderboard, you need to submit your\n              model files through this \u003ca href=\"https://forms.gle/AcF9TRqWrPhPzztt7\"\u003eform\u003c/a\u003e (The form provides an\n              option to opt-out of going on the leaderboard).\u003c/p\u003e\n            \u003cp\u003eThe leaderboard reports three sets of BLEU, PARENT and BLEURT scores for each submission - on the overall\n              test set, the \u003cem\u003eOverlap\u003c/em\u003e subset of the test set and the \u003cem\u003enon-Overlap\u003c/em\u003e subset of the test set.\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo specific dialects. The original language is from Wikipedia and it was post-edited by crowdraters\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language is post-edited English only (BCP-47: \u003ccode\u003een\u003c/code\u003e) Wikipedia text. No demographic\n              information about annotators is provided.\n              Some amounts of what may be called non-English text, including characters such as French accents or\n              Cyrillic characters, could sometimes occur, especially through fields with entity names as values in the\n              input table cells.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-3.0: Creative Commons Attribution Share Alike 3.0 Unported\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eToTTo is a Table-to-Text NLG task, as the paper title says. The task is as follows: Given a Wikipedia\n              table with row names, column names and table cells, with a subset of cells highlighted, generate a natural\n              language description for the highlighted part of the table . The table need not be exactly rectangular in\n              that - cells can sometimes be multi-row or multi-column.\u003c/p\u003e\n            \u003cp\u003eAn earlier example of a Table-to-Text NLG task is \u003ca href=\"https://arxiv.org/abs/1603.07771\"\u003eWikibio\u003c/a\u003e\n              - here the inputs were Wikipedia infoboxes (from the top right corner of entity-related Wiki pages). In\n              contrast, ToTTo mostly has Wikipedia tables from the main article content itself. In general,\n              Table-To-Text NLG tasks can be seen as a subclass of Data-To-Text NLG tasks - where the task is to\n              generate natural language descriptions of inputs which are in the form of structured or semi-structured\n              data. In general, all Data-To-Text NLG tasks need not have an explicit table or other structure - e.g the\n              input in \u003ca href=\"https://www.aclweb.org/anthology/W16-6626.pdf\"\u003eWebNLG\u003c/a\u003e is simply a list of triples.\n            \u003c/p\u003e\n            \u003cp\u003eImportantly, ToTTo differs from earlier examples of Table-To-Text NLG in that:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003e\n                \u003cp\u003eIt does not suffer from the problem of divergent references - where ground truth descriptions\n                  themselves have additional information not found in the table. ToTTo overcomes this by having a\n                  multi-step annotation process to edit the initial, free-form table descriptions (which are from\n                  Wikipedia) to make them faithful, unambiguous and independent of article context.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003eSince it provides \u003cstrong\u003econtrol\u003c/strong\u003e in the form of highlighted table cells, it prevents the\n                  problem of there being a large number of valid descriptions focussing on different parts of the table.\n                \u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a single, coherent English sentence that describes the highlighted\n              cells in the given table, also using metadata and any other information from the table as applicable.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle Research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnkur Parikh, Xuezhi Wang, Sebastian Gehrmann, Manaal Faruqui, Bhuwan Dhingra, Diyi Yang, Dipanjan Das\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle Research\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eVarun Gangal created the initial data card and Yacine Jernite wrote the data loader. The data card was\n              updated with new splits by Simon Mille. Sebastian Gehrmann ported the data card and loader from the v1 to\n              the v2 version and extended it with the new fields.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eThe \u003ccode\u003etable\u003c/code\u003e field is a \u003ccode\u003eList[List[Dict]]\u003c/code\u003e in row-major order, with outer lists\n                representing rows and the inner lists columns.\u003c/li\u003e\n              \u003cli\u003eEach \u003ccode\u003eDict\u003c/code\u003e has the fields \u003ccode\u003ecolumn_span: int\u003c/code\u003e, \u003ccode\u003eis_header: bool\u003c/code\u003e,\n                \u003ccode\u003erow_span: int\u003c/code\u003e, and \u003ccode\u003evalue: str\u003c/code\u003e.\u003c/li\u003e\n              \u003cli\u003eTable metadata consists of \u003ccode\u003etable_page_title\u003c/code\u003e, \u003ccode\u003etable_section_title\u003c/code\u003e and\n                \u003ccode\u003etable_section_texts\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003eThe \u003ccode\u003ehighlighted_cells\u003c/code\u003e are represented as \u003ccode\u003eList[[row_index,column_index]]\u003c/code\u003e,\n                with each \u003ccode\u003e[row_index,column_index]\u003c/code\u003e indicating that\n                \u003ccode\u003etable[row_index][column_index]\u003c/code\u003e is highlighted.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eexample_id\u003c/code\u003e is the unique id per example.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esentence_annotations[final_sentence]\u003c/code\u003e which is the table description/generation target\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe structure is aimed to encode highlighted tables in a way that allows rows and columns to span\n              multiple fields in width. The other fields are meta-data about the source and the annotations\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe initial table-description pairs are tables from Wikipedia articles, extracted through heuristics such\n              as Number Matching (tables and sentences that overlap with a non-date number of atleast 3 non-zero digits)\n              (Refer to Section 4 of the paper for more)\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eTable Readability: Tables which are deemed non-readable (due to foreign language, poor formatting etc\n                - a very small fraction of 0.5%) are removed from the dataset here.\u003c/li\u003e\n              \u003cli\u003eCell Highlighting: The annotator highlights the cells of the table which support the description.\u003c/li\u003e\n              \u003cli\u003eDeletion: The annotator removes phrases in the description which are not supported by the highlighted\n                cells\u003c/li\u003e\n              \u003cli\u003eDecontextualization: Descriptions may contain pronouns or other forms of anaphora, or other phenomena\n                which depend on the overall article topic - these are fixed by replacement (e.g replacing pronouns with\n                the entity, provided it occurs in the table). The replacements allowed are limited to one, and\n                annotators are also instructed to conserve fluency.\u003c/li\u003e\n              \u003cli\u003eSecondary Annotation: A second set of annotators is shown the output of Stage 4, and asked to fix it\n                if required to ensure it is grammatical.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe main repository's \u003ccode\u003eREADME.md\u003c/code\u003e already provides a thorough walkthrough of data instances\n              and fields \u003ca href=\"https://github.com/google-research-datasets/totto#dataset-description\"\u003ehere\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eBelow is the instance for a table from the wiki-page for the musical artist \u003cem\u003eWeird Al' Yankovic\u003c/em\u003e ,\n              likely listing his on-television appearances.\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e    {\n\"table_page_title\": \"'Weird Al' Yankovic\",\n\"table_webpage_url\": \"https://en.wikipedia.org/wiki/%22Weird_Al%22_Yankovic\",\n\"table_section_title\": \"Television\",\n\"table_section_text\": \"\",\n\"table\": \"[Described below]\",\n\"highlighted_cells\": [[22, 2], [22, 3], [22, 0], [22, 1], [23, 3], [23, 1], [23, 0]],\n\"example_id\": 12345678912345678912,\n\"sentence_annotations\": [{\"original_sentence\": \"In 2016, Al appeared in 2 episodes of BoJack Horseman as Mr. Peanutbutter's brother, Captain Peanutbutter, and was hired to voice the lead role in the 2016 Disney XD series Milo Murphy's Law.\",\n    \"sentence_after_deletion\": \"In 2016, Al appeared in 2 episodes of BoJack Horseman as Captain Peanutbutter, and was hired to the lead role in the 2016 series Milo Murphy's Law.\",\n    \"sentence_after_ambiguity\": \"In 2016, Al appeared in 2 episodes of BoJack Horseman as Captain Peanutbutter, and was hired for the lead role in the 2016 series Milo Murphy's 'Law.\",\n    \"final_sentence\": \"In 2016, Al appeared in 2 episodes of BoJack Horseman as Captain Peanutbutter and was hired for the lead role in the 2016 series Milo Murphy's Law.\"}],\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe \u003ccode\u003etable\u003c/code\u003e field is expanded as below:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e    [\n[\n  {\n      \"column_span\": 1,\n       \"is_header\": true,\n       \"row_span\": 1,\n       \"value\": \"Year\"},\n  {    \"column_span\": 1,\n       \"is_header\": true,\n       \"row_span\": 1,\n       \"value\": \"Title\"},\n  {    \"column_span\": 1,\n       \"is_header\": true,\n       \"row_span\": 1,\n       \"value\": \"Role\"},\n  {    \"column_span\": 1,\n       \"is_header\": true,\n       \"row_span\": 1,\n       \"value\": \"Notes\"}\n],\n[\n  {    \"column_span\": 1,\n       \"is_header\": false,\n       \"row_span\": 1,\n       \"value\": \"1997\"},\n  {    \"column_span\": 1,\n       \"is_header\": false,\n       \"row_span\": 1,\n       \"value\": \"Eek! The Cat\"},\n  {    \"column_span\": 1,\n       \"is_header\": false,\n       \"row_span\": 1,\n       \"value\": \"Himself\"},\n  {    \"column_span\": 1,\n       \"is_header\": false,\n       \"row_span\": 1,\n       \"value\": \"Episode: 'The FugEektive'\"}\n], ...\n]\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe \u003ca href=\"https://github.com/google-research/language/tree/master/language/totto\"\u003eSupplementary\n                Repo\u003c/a\u003e also provides browsable samples under its \u003ccode\u003esample/\u003c/code\u003e folder. It additionally provides\n              HTML visualization scripts with their outputs located under the aforementioned folder. The instructions to\n              access and visualize these samples can also be found \u003ca\n                href=\"https://github.com/google-research/language/tree/master/language/totto#visualizing-sample-data\"\u003ehere\u003c/a\u003e.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset consists of 120,000 train examples and equi-sized dev and test sets with 7700 examples.\n              Refer to Table 5 in the paper for a more extensive list of properties about table size, target vocabulary\n              etc and their aggregates.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dev and test splits are further equally distributed between \u003cem\u003eOverlap\u003c/em\u003e and \u003cem\u003enon-Overlap\u003c/em\u003e\n              .\n              The examples in the \u003cem\u003eOverlap\u003c/em\u003e set are harder on account of the domain shift resulting from them\n              having none of their header (row and column) names in common with those seen during training.\u003c/p\u003e\n            \u003cp\u003eRefer to Table 5 in the paper for a more extensive list of properties about table size, target vocabulary\n              etc and their aggregates.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are some very large tables in the dataset with thousands of rows. Table 7 shows some of the\n              challenges of the dataset, showing that very few examples require access to the table description itself\n              which makes those examples an outlier.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eToTTo is one of the two datasets representing Table-to-Text NLG in GEM, the other one being \u003ca\n                href=\"https://arxiv.org/pdf/2007.02871.pdf\"\u003eDART\u003c/a\u003e. Unlike DART, which combines datasets from multiple\n              sources and furnishes them in a unified setting, ToTTo is from a homogeneous source. As explained in the\n              Task Summary above, it also has an annotation process explicitly crafted to reduce divergent descriptions,\n              which is not true of DART.\u003c/p\u003e\n            \u003cp\u003eFurthermore, ToTTo is also an instance of a \u003cstrong\u003econtrolled\u003c/strong\u003e generation task - where in\n              addition to the input (in this case the table) an additional \u003cstrong\u003econtrol\u003c/strong\u003e (in this case the\n              highlighted cells) is given as an additional goal for the generation. The DART task formulation does not\n              include controls.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe input is much more complex and the quality much better than that of comparable datasets. The\n              highlighted table cells provide a unique challenge to models.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning, surface realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e9 challenge sets for ToTTo were added to the GEM evaluation suite, 8 created specifically for the task\n              and 1 coming from the original data.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eWe created subsets of the training and development sets of 500 randomly selected inputs each.\u003c/li\u003e\n              \u003cli\u003eWe applied input scrambling on a subset of 500 randomly selected test instances; the order of the\n                highlighted cells was randomly reassigned.\u003c/li\u003e\n              \u003cli\u003eFor the input size, we created subpopulations based on the number of input highlighted cells in the\n                whole table.\u003c/li\u003e\n            \u003c/ol\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput length\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                    \u003ctd\u003e898\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e1850\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e2221\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e1369\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e483\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e379\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e124\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e8\u003c/td\u003e\n                    \u003ctd\u003e128\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                    \u003ctd\u003e61\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                    \u003ctd\u003e40\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                    \u003ctd\u003e20\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                    \u003ctd\u003e26\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e13\u003c/td\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e15\u003c/td\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e16\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e17\u003c/td\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e18\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e19\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e20\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e21\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e22\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e23\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e24\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e25\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e26...496\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003col start=\"4\"\u003e\n              \u003cli\u003eWe also divided the test set according to the size of the whole table, based on the idea that larger\n                tables represent a bigger space to take into account when generating the highlighted cells; a larger\n                table could be more challenging to generate accurate text than a smaller table. There are 693 different\n                table sizes, ranging from 2 to 15834 cells.\u003c/li\u003e\n            \u003c/ol\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eTable size\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e71\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e52\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e41\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e144\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e47\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e8\u003c/td\u003e\n                    \u003ctd\u003e59\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e9\u003c/td\u003e\n                    \u003ctd\u003e105\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                    \u003ctd\u003e162\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                    \u003ctd\u003e158\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e13\u003c/td\u003e\n                    \u003ctd\u003e35\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                    \u003ctd\u003e79\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e15\u003c/td\u003e\n                    \u003ctd\u003e136\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e16\u003c/td\u003e\n                    \u003ctd\u003e111\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e17\u003c/td\u003e\n                    \u003ctd\u003e48\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e18\u003c/td\u003e\n                    \u003ctd\u003e123\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e19\u003c/td\u003e\n                    \u003ctd\u003e29\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e20\u003c/td\u003e\n                    \u003ctd\u003e112\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e21\u003c/td\u003e\n                    \u003ctd\u003e91\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e22\u003c/td\u003e\n                    \u003ctd\u003e17\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e23\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e24\u003c/td\u003e\n                    \u003ctd\u003e169\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e25\u003c/td\u003e\n                    \u003ctd\u003e56\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e26\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e27\u003c/td\u003e\n                    \u003ctd\u003e40\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e28\u003c/td\u003e\n                    \u003ctd\u003e77\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e29\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e30\u003c/td\u003e\n                    \u003ctd\u003e122\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e31\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e32\u003c/td\u003e\n                    \u003ctd\u003e49\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e33\u003c/td\u003e\n                    \u003ctd\u003e21\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e34\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e35\u003c/td\u003e\n                    \u003ctd\u003e103\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                    \u003ctd\u003e131\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e37\u003c/td\u003e\n                    \u003ctd\u003e10\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e38\u003c/td\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e39\u003c/td\u003e\n                    \u003ctd\u003e26\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e40\u003c/td\u003e\n                    \u003ctd\u003e110\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e41\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e42\u003c/td\u003e\n                    \u003ctd\u003e54\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e43\u003c/td\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e44\u003c/td\u003e\n                    \u003ctd\u003e47\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e45\u003c/td\u003e\n                    \u003ctd\u003e79\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e46\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e47\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e48\u003c/td\u003e\n                    \u003ctd\u003e114\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e49\u003c/td\u003e\n                    \u003ctd\u003e18\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e50\u003c/td\u003e\n                    \u003ctd\u003e55\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e51\u003c/td\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e52\u003c/td\u003e\n                    \u003ctd\u003e43\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e54\u003c/td\u003e\n                    \u003ctd\u003e80\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e55\u003c/td\u003e\n                    \u003ctd\u003e73\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e56\u003c/td\u003e\n                    \u003ctd\u003e64\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e57\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e58\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e60\u003c/td\u003e\n                    \u003ctd\u003e114\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e61\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e63\u003c/td\u003e\n                    \u003ctd\u003e39\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e64\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e65\u003c/td\u003e\n                    \u003ctd\u003e62\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e66\u003c/td\u003e\n                    \u003ctd\u003e48\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e67\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e68\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e69\u003c/td\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e70\u003c/td\u003e\n                    \u003ctd\u003e81\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e72\u003c/td\u003e\n                    \u003ctd\u003e76\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e73\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e74\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e75\u003c/td\u003e\n                    \u003ctd\u003e44\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e76\u003c/td\u003e\n                    \u003ctd\u003e33\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e77\u003c/td\u003e\n                    \u003ctd\u003e30\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e78\u003c/td\u003e\n                    \u003ctd\u003e66\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e79\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e80\u003c/td\u003e\n                    \u003ctd\u003e83\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e81\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e82\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e84\u003c/td\u003e\n                    \u003ctd\u003e80\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e85\u003c/td\u003e\n                    \u003ctd\u003e25\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e86\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e87\u003c/td\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e88\u003c/td\u003e\n                    \u003ctd\u003e35\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e90\u003c/td\u003e\n                    \u003ctd\u003e78\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e91\u003c/td\u003e\n                    \u003ctd\u003e18\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e92\u003c/td\u003e\n                    \u003ctd\u003e22\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e93\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e94\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e95\u003c/td\u003e\n                    \u003ctd\u003e31\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e96\u003c/td\u003e\n                    \u003ctd\u003e50\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e98\u003c/td\u003e\n                    \u003ctd\u003e11\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e99\u003c/td\u003e\n                    \u003ctd\u003e14\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e100\u003c/td\u003e\n                    \u003ctd\u003e48\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e102\u003c/td\u003e\n                    \u003ctd\u003e24\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e104\u003c/td\u003e\n                    \u003ctd\u003e29\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e105\u003c/td\u003e\n                    \u003ctd\u003e36\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e106\u003c/td\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e108\u003c/td\u003e\n                    \u003ctd\u003e51\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e110\u003c/td\u003e\n                    \u003ctd\u003e31\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e...8000+\u003c/td\u003e\n                    \u003ctd\u003e(up to 10)\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003col start=\"5\"\u003e\n              \u003cli\u003eWe also created three splits based on the subset of test examples in pages about people.\n                We then used the structured information in WikiData to identify the following information:\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cul\u003e\n              \u003cli\u003egender (male, and female),\u003c/li\u003e\n              \u003cli\u003enationality grouped by continent (Africa, Asia, Europe, North America, Oceania, and South America)\n              \u003c/li\u003e\n              \u003cli\u003eethnicity (African American and all USA)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eThe categories within gender, ethnicity, and nationality were chosen based on data availability; The\n              ToTTo dataset includes mostly tables that do not focus on people. As a result, only seven people in the\n              original test set are marked as having a non-binary gender. Similar sparsity informed the grouping of\n              nationalities by continent – only 19 countries are represented by more than 10 people in the test set. In\n              case a person has citizenships across multiple continents, we may include the person in any of the\n              included continents.\u003c/p\u003e\n            \u003cp\u003eFinally, ethnicity is very sparsely annotated in WikiData; only 150 test examples in ToTTo have this\n              information and 128 of these are African Americans. We thus are unable to compare the performance on,\n              e.g., Yoruba or Punjabi people, both of which have fewer than five instances. Another caveat here is that\n              only 21 of the 128 people are female. We thus compare the African American population to results on a\n              subset that includes all US citizens.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003egeneralization, fairness, robustness\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003cp\u003eThe highest spot on the leaderboard is currently held by an anonymous method, with BLEU=49.2,\n                  PARENT=58.7 and BLEURT=0.249 on the \u003cem\u003eOverall\u003c/em\u003e test set.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003eThe \u003cstrong\u003ehighest scoring non-anonymous\u003c/strong\u003e method is the T5-based method of \u003ca\n                    href=\"https://arxiv.org/abs/2005.10433\"\u003eKale, 2020\u003c/a\u003e. This method uses a simple row-major\n                  linearization scheme to convert the table (it chooses only the highlighted cells and ignores the other\n                  cells - table titles and section titles are prefixed at the start of the respective section table) to\n                  a flat string. The linearized input - output description pairs from training examples are then used to\n                  finetune T5, with BLEU being used as the dev metric to pick checkpoints, and beam search with beam\n                  size 10 being the decoding method.\u003c/p\u003e\n                \u003cp\u003eThough the best numbers from this method are naturally from the largest T5-pretrained architecture\n                  (T5-3B), the paper shows improvements over the next-highest BERT-to-BERT method even when using\n                  T5-Base or T5-Small, which have the same and lesser parameters than BERT-to-BERT respectively.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003eThe \u003ca href=\"https://github.com/google-research/language/tree/master/language/totto\"\u003eSupplementary\n                    Repo\u003c/a\u003e provides several useful modules to get started with for new approach implementation:\u003c/p\u003e\n                \u003col\u003e\n                  \u003cli\u003e\n                    \u003cp\u003eCode for the particular preprocessing / linearization scheme used to linearize the tables into\n                      flat sequences for the baseline approaches described in the paper has been described and shared \u003ca\n                        href=\"https://github.com/google-research/language/tree/master/language/totto#baseline-preprocessing\"\u003eherein\u003c/a\u003e\n                    \u003c/p\u003e\n                  \u003c/li\u003e\n                  \u003cli\u003e\n                    \u003cp\u003eAn \u003ca\n                        href=\"https://github.com/google-research/language/tree/master/language/totto#running-the-evaluation-scripts-locally\"\u003eevaluation\n                        script\u003c/a\u003e for locally scoring BLEU and PARENT system outputs on dev (or train) sets. Since\n                      BLEURT is a model-based metric, a \u003ca\n                        href=\"https://github.com/google-research/language/tree/master/language/totto#running-the-evaluation-scripts-locall://github.com/google-research/language/tree/master/language/totto#computing-the-bleurt-score\"\u003eslightly\n                        separate\u003c/a\u003e set of instructions is provided to evaluate on the same.\u003c/p\u003e\n                  \u003c/li\u003e\n                \u003c/ol\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eReasoning, surface realization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eBLEURT\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eParent: a metric that measures the F-1 score of overlap between input content words and those used in\n              references and those in generated text while ignoring the general surface form. It can thus measure the\n              faithfulness much better than metrics that measure overlap with a reference\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe metrics are used as in the leaderboard. The original paper additionally conducted a human evaluation\n              focusing on fluency, faithfulness, and coverage.\n              Faithfulness was measured as whether facts in the text are not supported by the input, and coverage as the\n              number of highlighted cells that were considered. They thus represent precision and recall of the content.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSee leaderboard.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTables occurring in Wikipedia articles were chosen as the data source with the following reasons in mind:\n            \u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eWide coverage in terms of both vocabulary and concepts.\u003c/li\u003e\n              \u003cli\u003eWikipedia tables are not confined to a regular structure, with multi-row or multi-column cells\n                occurring with a sufficient frequency.\u003c/li\u003e\n              \u003cli\u003eLikely to contain reasonable-quality, natural text descriptions in the proximity of the table, which\n                are also extractable by heuristics. (see the start of Section 4 for the heuristics used)\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eTo prevent an overlap with the earlier \u003ca href=\"https://arxiv.org/abs/1603.07771\"\u003eWikibio\u003c/a\u003e dataset\n              which focussed on Infobox-first sentence pairs from Wikipedia biography articles, the authors avoid using\n              Infoboxes as a data source.\u003c/p\u003e\n            \u003cp\u003eThe overall curation process of initially collecting free text and then annotator-revising it, was\n              designed to combine the advantages of free-form text descriptions (which are fluent, high-quality and\n              unhurriedly written, but also divergent and unfaithful) with annotator descriptions (which can be tailored\n              to be faithful and to conform exactly to desired task requirements)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe speaker is required to produce a single, coherent English sentence that describes the highlighted\n              cells in the given table, also using metadata and any other information from the table as applicable.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ewikipedia.org\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther crowdworker platform\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe basic source language producers are Wikipedia authors and/or editors, since the annotation starts\n              with the natural text description near the Wikipedia table.\n              The auxiliary source language producers are the annotators (two per example) who iteratively revise these\n              descriptions to make them unambiguous and faithful to a subset of highlighted cells in the table.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by crowdworker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe initial table-description pairs are tables from Wikipedia articles, extracted through heuristics such\n              as Number Matching (tables and sentences that overlap with a non-date number of atleast 3 non-zero digits)\n              (Refer to Section 4 of the paper for more)\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eTable Readability: Tables which are deemed non-readable (due to foreign language, poor formatting etc\n                - a very small fraction of 0.5%) are removed from the dataset here.\u003c/li\u003e\n              \u003cli\u003eCell Highlighting: The annotator highlights the cells of the table which support the description.\u003c/li\u003e\n              \u003cli\u003eDeletion: The annotator removes phrases in the description which are not supported by the highlighted\n                cells\u003c/li\u003e\n              \u003cli\u003eDecontextualization: Descriptions may contain pronouns or other forms of anaphora, or other phenomena\n                which depend on the overall article topic - these are fixed by replacement (e.g replacing pronouns with\n                the entity, provided it occurs in the table). The replacements allowed are limited to one, and\n                annotators are also instructed to conserve fluency.\u003c/li\u003e\n              \u003cli\u003eSecondary Annotation: A second set of annotators is shown the output of Stage 4, and asked to fix it\n                if required to ensure it is grammatical.\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eThe paper does not specifically describe the annotation platform or location profiles of the annotators.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAfter construction of the splits, the data curators filtered training examples that had rare table header\n              combinations (\u0026#x3C;=5 examples) and which had an overlap with the validation or test splits.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnnotators were full time employees that were aware of the goal of the project and consented to having\n              the data released as part of the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the source data is from wikipedia, only data in the public domain is included in the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eMaintenance Plan Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the original dataset's maintenance plan.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor submissions, you can delete your data by emailing \u003ca\n                href=\"mailto:totto@google.com\"\u003etotto@google.com\u003c/a\u003e from the email account used to sign up for the\n              submission. Deletion requests will be responded to within 60 days.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMaintainer Contact Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide contact information of a person responsible for the dataset maintenance\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAnkur Parikh (\u003ca href=\"mailto:aparikh@google.com\"\u003eaparikh@google.com\u003c/a\u003e)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny Contestation Mechanism?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the maintenance plan include a contestation mechanism allowing individuals to request removal\n                    fo content?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eform submission\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContestation Form Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the form link or contact information\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:totto@google.com\"\u003etotto@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original work as well as our GEM paper analyzes some biases\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is created using tables and the table cell contents may hence naturally exhibit biases which\n              have been found to exist in Wikipedia such as some forms of gender bias (e.g \u003ca\n                href=\"https://labtomarket.files.wordpress.com/2018/01/wiki_gender_bias.pdf\"\u003e(Graells-Garido et\n                al.,2015)\u003c/a\u003e notes that spouse information is more likely discussed for females than males)\u003c/p\u003e\n            \u003cp\u003eThe table descriptions (targets/references) are, as discussed earlier, collected through a two-step\n              process.\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eThe natural text description near the table is taken as a starting point. This is Wikipedia article\n                text as created upto that point in time by a chain of collaborative edits from Wikipedia authors.\u003c/li\u003e\n              \u003cli\u003eThe initial description is revised by chain of two or more annotated revisions, to make it unambiguous\n                and faithful to a set of highlighted table cells.\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eFrom their origin in 1), the descriptions may exhibit biases seen in Wikipedia text as mentioned above.\n              From their revisions in 2), the descriptions may show biases originating from annotator-authored text,\n              such as a preference for shorter descriptions since they're faster to write, or linguistic preferences\n              influenced by the locations dominant in the annotator distribution. (However, note that these are likely\n              to be much reduced since the annotators here are merely revising rather than completely authoring.\n              Moreover, each sentence goes through atleast two annotators, which acts as a check against the personal\n              biases of a single annotator.)\u003c/p\u003e\n            \u003cp\u003eNaturally-occurring text is also known to suffer from other biases such as reporting bias \u003ca\n                href=\"https://openreview.net/forum?id=AzxEzvpdE3Wcy\u0026#x26;noteId=vmR8qaby8fqxittps://labtomarket.files.wordpress.com/2018/01/wiki_gender_bias.pdf\"\u003e(Gordon\n                and Van Durme, 2013)\u003c/a\u003e - this also applies to this dataset via its origin from Wikipedia.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the source data is from wikipedia, only data in the public domain is included in the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is limited to topics that are present in Wikipedia, more specifically those topics that are\n              present in articles which contain atleast one table\n              \u003cem\u003eSports\u003c/em\u003e and \u003cem\u003eCountries\u003c/em\u003e form 53.4% of the dataset. The remaining fraction is made up of\n              broader topics like \u003cem\u003eEurope\u003c/em\u003e, \u003cem\u003eNorth America\u003c/em\u003eand \u003cem\u003ePolitics\u003c/em\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"totto","type":"Data-to-Text","languages":"English","summary":"ToTTo is a high-quality English table-to-text dataset with more than 100,000 examples in which a table from Wikipedia with highlighted cells is paired with a sentence that describes the highlighted cells. All examples in the dataset were post-edited in multiple steps to ensure that the targets are fully faithful to the input information."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"totto"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/turku_hockey_data2text.html b/data_cards/turku_hockey_data2text.html
index 4a9823e7..68ad0bba 100644
--- a/data_cards/turku_hockey_data2text.html
+++ b/data_cards/turku_hockey_data2text.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->turku_hockey_data2text</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">turku_hockey_data2text</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->turku_hockey_data2text</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">turku_hockey_data2text</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>turku_hockey_data2text</h2>
@@ -1813,4 +1813,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"turku_hockey_data2text","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eturku_hockey_data2text\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis is a Finnish data-to-text dataset in which the input is structured information about a hockey game and\n          the output a description of the game.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/turku_hockey_data2text')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/turku_hockey_data2text\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://turkunlp.org/hockey_data2text.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-6125/\"\u003eACL anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJenna Kanerva, Samuel Rönnqvist, Riina Kekki, Tapio Salakoski, Filip Ginter (TurkuNLP / University of Turku)\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJenna Kanerva, Filip Ginter\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eFinnish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-sa-4.0: Creative Commons Attribution Non Commercial Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eDescribe an event from an ice hockey game based on the given structural data.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eexpert created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://turkunlp.org/hockey_data2text.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/TurkuNLP/Turku-hockey-data2text\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-6125/\"\u003eACL anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{kanerva2019newsgen,\nTitle = {Template-free Data-to-Text Generation of Finnish Sports News},\nAuthor = {Jenna Kanerva and Samuel R{\\\"o}nnqvist and Riina Kekki and Tapio Salakoski and Filip Ginter},\nbooktitle = {Proceedings of the 22nd Nordic Conference on Computational Linguistics (NoDaLiDa’19)},\nyear={2019}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jmnybl@utu.fi\"\u003ejmnybl@utu.fi\u003c/a\u003e, \u003ca href=\"mailto:figint@utu.fi\"\u003efigint@utu.fi\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ewritten standard language\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFinnish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original news articles are written by professional journalists. The text passages extracted in the\n              annotation may be slightly edited compared to the original language during the corpus annotation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-sa-4.0: Creative Commons Attribution Non Commercial Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset was developed as a benchmark for evaluating template-free, machine learning methods on\n              Finnish news generation in the area of ice hockey reporting.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDescribe an event from an ice hockey game based on the given structural data.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Turku\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Samuel Rönnqvist, Riina Kekki, Tapio Salakoski, Filip Ginter (TurkuNLP / University of\n              Turku)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe project was supported by the Google Digital News Innovation Fund.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter (TurkuNLP / University of Turku)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is constructed of games, where each game is a list of events. If the event was annotated\n              (corresponding sentence was found from the news article), it includes \u003ccode\u003etext\u003c/code\u003e field with value\n              other than empty string (\"\").\u003c/p\u003e\n            \u003cp\u003eFor each game (dict), there are keys \u003ccode\u003egem_id\u003c/code\u003e (string), \u003ccode\u003eid\u003c/code\u003e (string),\n              \u003ccode\u003enews_article\u003c/code\u003e (string), and \u003ccode\u003eevents\u003c/code\u003e (list).\u003c/p\u003e\n            \u003cp\u003eFor each event (dict), there are different, relevant keys available with non empty values depending on\n              the event type (e.g. goal or penalty). The mandatory keys for each event are \u003ccode\u003eevent_id\u003c/code\u003e\n              (string), \u003ccode\u003eevent_type\u003c/code\u003e (string), \u003ccode\u003etext\u003c/code\u003e (string, empty string if not annotated), and\n              \u003ccode\u003emulti_reference\u003c/code\u003e (bool). The keys not relevant for the specific event type are left empty.\u003c/p\u003e\n            \u003cp\u003eThe relevant keys in the event dictionary are:\u003c/p\u003e\n            \u003cp\u003eFor each event type, the following keys are relevant:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event, possible values are \u003ccode\u003egame result\u003c/code\u003e,\n              \u003ccode\u003egoal\u003c/code\u003e, \u003ccode\u003epenalty\u003c/code\u003e, or \u003ccode\u003esaves\u003c/code\u003e (string)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003eThe rest of the fields are specific to the event type. The relevant fields for each event type are:\u003c/p\u003e\n            \u003cp\u003egame result:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event (string)\n              \u003ccode\u003ehome_team\u003c/code\u003e: Name of the home team (string)\n              \u003ccode\u003eguest_team\u003c/code\u003e: Name of the guest team (string)\n              \u003ccode\u003escore\u003c/code\u003e: Final score of the game, in the form of home–guest (string)\n              \u003ccode\u003eperiods\u003c/code\u003e: Scores for individual periods, each in the form of home–guest score in that period\n              (list of strings)\n              \u003ccode\u003efeatures\u003c/code\u003e: Additional features, such as overtime win or shoot out (list of strings)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003egoal:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event (string)\n              \u003ccode\u003eplayer\u003c/code\u003e: Name of the player scoring (string)\n              \u003ccode\u003eassist\u003c/code\u003e: Names of the players assisting, at most two players (list of strings)\n              \u003ccode\u003eteam\u003c/code\u003e: Team scoring with possible values of \u003ccode\u003ehome\u003c/code\u003e or \u003ccode\u003eguest\u003c/code\u003e (string)\n              \u003ccode\u003eteam_name\u003c/code\u003e: Name of the team scoring (string)\n              \u003ccode\u003escore\u003c/code\u003e: Score after the goal, in the form of home–guest (string)\n              \u003ccode\u003etime\u003c/code\u003e: Time of the goal, minutes and seconds from the beginning (string)\n              \u003ccode\u003efeatures\u003c/code\u003e: Additional features, such as power play or short-handed goal (list of strings)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003epenalty:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event (string)\n              \u003ccode\u003eplayer\u003c/code\u003e: Name of the player getting the penalty (string)\n              \u003ccode\u003eteam\u003c/code\u003e: Team getting the penalty with possible values of \u003ccode\u003ehome\u003c/code\u003e or\n              \u003ccode\u003eguest\u003c/code\u003e (string)\n              \u003ccode\u003eteam_name\u003c/code\u003e: Name of the team getting the penalty (string)\n              \u003ccode\u003epenalty_minutes\u003c/code\u003e: Penalty minutes (string)\n              \u003ccode\u003etime\u003c/code\u003e: Time of the penalty, minutes and seconds from the beginning (string)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003esaves:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event (string)\n              \u003ccode\u003eplayer\u003c/code\u003e: Name of the goalkeeper (string)\n              \u003ccode\u003eteam\u003c/code\u003e: Team of the goalkeeper with possible values of \u003ccode\u003ehome\u003c/code\u003e or \u003ccode\u003eguest\u003c/code\u003e\n              (string)\n              \u003ccode\u003eteam_name\u003c/code\u003e: Name of the team (string)\n              \u003ccode\u003esaves\u003c/code\u003e: Number of saves in the game (string)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003eText passages describing multiple events (multi_reference):\u003c/p\u003e\n            \u003cp\u003eSome text passages refer to multiple events in such way that separating them to individual statements is\n              not adequate (e.g. \"The home team received two penalties towards the end of the first period.\"). In these\n              cases, multiple events are aligned to the same text passage so that the first event (in chronological\n              order) include the annotated text passage, while the rest of the events referring to the same text passage\n              include the identifier of the first event in the annotated text field (e.g. \u003ccode\u003etext\u003c/code\u003e: \"E4\").\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'gem_id': 'gem-turku_hockey_data2text-train-0',\n'id': '20061031-TPS-HPK',\n'news_article': 'HPK:n hyvä syysvire jatkuu jääkiekon SM-liigassa. Tiistaina HPK kukisti mainiolla liikkeellä ja tehokkaalla ylivoimapelillä TPS:n vieraissa 1–0 (1–0, 0–0, 0–0).\\nHPK hyödynsi ylivoimaa mennen jo ensimmäisessä erässä Mikko Mäenpään maalilla 1–0 -johtoon.\\nToisessa ja kolmannessa erässä HPK tarjosi edelleen TPS:lle runsaasti tilanteita, mutta maalia eivät turkulaiset millään ilveellä saaneet. Pahin este oli loistavan pelin Hämeenlinnan maalilla pelannut Mika Oksa.\\nTPS:n maalissa Jani Hurme ei osumille mitään mahtanut. Joukkueen suuri yksinäinen kenttäpelaaja oli Kai Nurminen, mutta hänelläkään ei ollut onnea maalitilanteissa.',\n'events':\n{\n'event_id': ['E1', 'E2', 'E3'],\n'event_type': ['game result', 'penalty', 'goal'],\n'text': ['HPK kukisti TPS:n vieraissa 1–0 (1–0, 0–0, 0–0).', '', 'HPK hyödynsi ylivoimaa mennen jo ensimmäisessä erässä Mikko Mäenpään maalilla 1–0 -johtoon.'],\n'home_team': ['TPS', '', ''],\n'guest_team': ['HPK', '', ''],\n'score': ['0–1', '', '0–1'],\n'periods': [['0–1', '0–0', '0–0'], [], []],\n'features': [[], [], ['power play']],\n'player': ['', 'Fredrik Svensson', 'Mikko Mäenpää'],\n'assist': [[], [], ['Jani Keinänen', 'Toni Mäkiaho']],\n'team': ['', 'guest', 'guest'],\n'team_name': ['', 'HPK', 'HPK'],\n'time': ['', '9.28', '14.57'],\n'penalty_minutes': ['', '2', ''],\n'saves': ['', '', ''],\n'multi_reference': [false, false, false]\n}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus include 3 splits: train, validation, and test.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was created to develop machine learned text generation models for Finnish ice hockey news,\n              where the generation would reflect the natural language variation found from the game reports written by\n              professional journalists. While the original game reports often include additional information not\n              derivable from the game statistics, the corpus was fully manually curated to remove all such information\n              from the natural language descriptions. The rationale of such curation was to prevent model\n              'hallucinating' additional facts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis is the only data2text corpus for Finnish in GEM.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emorphological inflection, language variation\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points modified\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eStructural data was translated into English.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eWER\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatic evaluation: BLEU, NIST, METEOR, ROUGE-L, CIDEr\n              Manual evaluation: factual mistakes, grammatical errors, minimum edit distance to an acceptable game\n              report (using WER)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is designed for text generation (data2text), where the original source of natural language\n              descriptions is news articles written by journalists. While the link between structural data (ice hockey\n              game statistics) and the news articles describing the game was quite weak (news articles including a lot\n              of information not derivable from the statistics, while leaving many events unmentioned), the corpus\n              includes full manual annotation aligning the events extracted from game statistics and the corresponding\n              natural language passages extracted from the news articles.\u003c/p\u003e\n            \u003cp\u003eEach event is manually aligned into a sentence-like passage, and in case a suitable passage was not\n              found, the annotation is left empty (with value \u003ccode\u003eNone\u003c/code\u003e). The extracted passages were manually\n              modified not to include additional information not derivable from the game statistics, or not considered\n              as world knowledge. The manual curation of passages is designed to prevent model hallucination, i.e. model\n              learning to generate facts not derivable from the input data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDescribing the given events (structural data) in natural language, and therefore generating ice hockey\n              game reports.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe initial data, both game statistics and news articles, were obtained from the Finnish News Agency STT\n              news archives released for academic use (\u003ca\n                href=\"http://urn.fi/urn:nbn:fi:lb-2019041501\"\u003ehttp://urn.fi/urn:nbn:fi:lb-2019041501\u003c/a\u003e). The original\n              news articles are written by professional journalists.\u003c/p\u003e\n            \u003cp\u003eWe (TurkuNLP) gratefully acknowledge the collaboration of Maija Paikkala, Salla Salmela and Pihla\n              Lehmusjoki from the Finnish News Agency STT while creating the corpus.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIce hockey, news\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eInclude only games, where both game statistics and a news article describing the game were available\n              (based on timestamps and team names).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eexpert created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMembers of the TurkuNLP research group, native speakers of Finnish.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eManual alignment of events and their natural language descriptions. Removing information not derivable\n              from the input data or world knowledge in order to prevent the model 'hallucination'.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curators\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eManual inspection of examples during the initial annotation training phrase.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus license was agreed with the providers of the source material.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset represents only written standard language.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"turku_hockey_data2text","type":"Data-to-Text","languages":"Finnish","summary":"This is a Finnish data-to-text dataset in which the input is structured information about a hockey game and the output a description of the game."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"turku_hockey_data2text"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"turku_hockey_data2text","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eturku_hockey_data2text\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis is a Finnish data-to-text dataset in which the input is structured information about a hockey game and\n          the output a description of the game.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/turku_hockey_data2text')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/turku_hockey_data2text\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://turkunlp.org/hockey_data2text.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-6125/\"\u003eACL anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJenna Kanerva, Samuel Rönnqvist, Riina Kekki, Tapio Salakoski, Filip Ginter (TurkuNLP / University of Turku)\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJenna Kanerva, Filip Ginter\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eFinnish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-sa-4.0: Creative Commons Attribution Non Commercial Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eDescribe an event from an ice hockey game based on the given structural data.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eexpert created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://turkunlp.org/hockey_data2text.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/TurkuNLP/Turku-hockey-data2text\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-6125/\"\u003eACL anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{kanerva2019newsgen,\nTitle = {Template-free Data-to-Text Generation of Finnish Sports News},\nAuthor = {Jenna Kanerva and Samuel R{\\\"o}nnqvist and Riina Kekki and Tapio Salakoski and Filip Ginter},\nbooktitle = {Proceedings of the 22nd Nordic Conference on Computational Linguistics (NoDaLiDa’19)},\nyear={2019}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jmnybl@utu.fi\"\u003ejmnybl@utu.fi\u003c/a\u003e, \u003ca href=\"mailto:figint@utu.fi\"\u003efigint@utu.fi\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ewritten standard language\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFinnish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original news articles are written by professional journalists. The text passages extracted in the\n              annotation may be slightly edited compared to the original language during the corpus annotation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-sa-4.0: Creative Commons Attribution Non Commercial Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset was developed as a benchmark for evaluating template-free, machine learning methods on\n              Finnish news generation in the area of ice hockey reporting.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDescribe an event from an ice hockey game based on the given structural data.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Turku\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Samuel Rönnqvist, Riina Kekki, Tapio Salakoski, Filip Ginter (TurkuNLP / University of\n              Turku)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe project was supported by the Google Digital News Innovation Fund.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter (TurkuNLP / University of Turku)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is constructed of games, where each game is a list of events. If the event was annotated\n              (corresponding sentence was found from the news article), it includes \u003ccode\u003etext\u003c/code\u003e field with value\n              other than empty string (\"\").\u003c/p\u003e\n            \u003cp\u003eFor each game (dict), there are keys \u003ccode\u003egem_id\u003c/code\u003e (string), \u003ccode\u003eid\u003c/code\u003e (string),\n              \u003ccode\u003enews_article\u003c/code\u003e (string), and \u003ccode\u003eevents\u003c/code\u003e (list).\u003c/p\u003e\n            \u003cp\u003eFor each event (dict), there are different, relevant keys available with non empty values depending on\n              the event type (e.g. goal or penalty). The mandatory keys for each event are \u003ccode\u003eevent_id\u003c/code\u003e\n              (string), \u003ccode\u003eevent_type\u003c/code\u003e (string), \u003ccode\u003etext\u003c/code\u003e (string, empty string if not annotated), and\n              \u003ccode\u003emulti_reference\u003c/code\u003e (bool). The keys not relevant for the specific event type are left empty.\u003c/p\u003e\n            \u003cp\u003eThe relevant keys in the event dictionary are:\u003c/p\u003e\n            \u003cp\u003eFor each event type, the following keys are relevant:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event, possible values are \u003ccode\u003egame result\u003c/code\u003e,\n              \u003ccode\u003egoal\u003c/code\u003e, \u003ccode\u003epenalty\u003c/code\u003e, or \u003ccode\u003esaves\u003c/code\u003e (string)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003eThe rest of the fields are specific to the event type. The relevant fields for each event type are:\u003c/p\u003e\n            \u003cp\u003egame result:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event (string)\n              \u003ccode\u003ehome_team\u003c/code\u003e: Name of the home team (string)\n              \u003ccode\u003eguest_team\u003c/code\u003e: Name of the guest team (string)\n              \u003ccode\u003escore\u003c/code\u003e: Final score of the game, in the form of home–guest (string)\n              \u003ccode\u003eperiods\u003c/code\u003e: Scores for individual periods, each in the form of home–guest score in that period\n              (list of strings)\n              \u003ccode\u003efeatures\u003c/code\u003e: Additional features, such as overtime win or shoot out (list of strings)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003egoal:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event (string)\n              \u003ccode\u003eplayer\u003c/code\u003e: Name of the player scoring (string)\n              \u003ccode\u003eassist\u003c/code\u003e: Names of the players assisting, at most two players (list of strings)\n              \u003ccode\u003eteam\u003c/code\u003e: Team scoring with possible values of \u003ccode\u003ehome\u003c/code\u003e or \u003ccode\u003eguest\u003c/code\u003e (string)\n              \u003ccode\u003eteam_name\u003c/code\u003e: Name of the team scoring (string)\n              \u003ccode\u003escore\u003c/code\u003e: Score after the goal, in the form of home–guest (string)\n              \u003ccode\u003etime\u003c/code\u003e: Time of the goal, minutes and seconds from the beginning (string)\n              \u003ccode\u003efeatures\u003c/code\u003e: Additional features, such as power play or short-handed goal (list of strings)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003epenalty:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event (string)\n              \u003ccode\u003eplayer\u003c/code\u003e: Name of the player getting the penalty (string)\n              \u003ccode\u003eteam\u003c/code\u003e: Team getting the penalty with possible values of \u003ccode\u003ehome\u003c/code\u003e or\n              \u003ccode\u003eguest\u003c/code\u003e (string)\n              \u003ccode\u003eteam_name\u003c/code\u003e: Name of the team getting the penalty (string)\n              \u003ccode\u003epenalty_minutes\u003c/code\u003e: Penalty minutes (string)\n              \u003ccode\u003etime\u003c/code\u003e: Time of the penalty, minutes and seconds from the beginning (string)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003esaves:\n              \u003ccode\u003eevent_id\u003c/code\u003e: Identifier of the event, unique to the game but not globally, in chronological\n              order (string)\n              \u003ccode\u003eevent_type\u003c/code\u003e: Type of the event (string)\n              \u003ccode\u003eplayer\u003c/code\u003e: Name of the goalkeeper (string)\n              \u003ccode\u003eteam\u003c/code\u003e: Team of the goalkeeper with possible values of \u003ccode\u003ehome\u003c/code\u003e or \u003ccode\u003eguest\u003c/code\u003e\n              (string)\n              \u003ccode\u003eteam_name\u003c/code\u003e: Name of the team (string)\n              \u003ccode\u003esaves\u003c/code\u003e: Number of saves in the game (string)\n              \u003ccode\u003etext\u003c/code\u003e: Natural language description of the event, or empty string if not available (string)\n              \u003ccode\u003emulti_reference\u003c/code\u003e: Does this event refer to a text passage describing multiple events? (bool)\n            \u003c/p\u003e\n            \u003cp\u003eText passages describing multiple events (multi_reference):\u003c/p\u003e\n            \u003cp\u003eSome text passages refer to multiple events in such way that separating them to individual statements is\n              not adequate (e.g. \"The home team received two penalties towards the end of the first period.\"). In these\n              cases, multiple events are aligned to the same text passage so that the first event (in chronological\n              order) include the annotated text passage, while the rest of the events referring to the same text passage\n              include the identifier of the first event in the annotated text field (e.g. \u003ccode\u003etext\u003c/code\u003e: \"E4\").\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'gem_id': 'gem-turku_hockey_data2text-train-0',\n'id': '20061031-TPS-HPK',\n'news_article': 'HPK:n hyvä syysvire jatkuu jääkiekon SM-liigassa. Tiistaina HPK kukisti mainiolla liikkeellä ja tehokkaalla ylivoimapelillä TPS:n vieraissa 1–0 (1–0, 0–0, 0–0).\\nHPK hyödynsi ylivoimaa mennen jo ensimmäisessä erässä Mikko Mäenpään maalilla 1–0 -johtoon.\\nToisessa ja kolmannessa erässä HPK tarjosi edelleen TPS:lle runsaasti tilanteita, mutta maalia eivät turkulaiset millään ilveellä saaneet. Pahin este oli loistavan pelin Hämeenlinnan maalilla pelannut Mika Oksa.\\nTPS:n maalissa Jani Hurme ei osumille mitään mahtanut. Joukkueen suuri yksinäinen kenttäpelaaja oli Kai Nurminen, mutta hänelläkään ei ollut onnea maalitilanteissa.',\n'events':\n{\n'event_id': ['E1', 'E2', 'E3'],\n'event_type': ['game result', 'penalty', 'goal'],\n'text': ['HPK kukisti TPS:n vieraissa 1–0 (1–0, 0–0, 0–0).', '', 'HPK hyödynsi ylivoimaa mennen jo ensimmäisessä erässä Mikko Mäenpään maalilla 1–0 -johtoon.'],\n'home_team': ['TPS', '', ''],\n'guest_team': ['HPK', '', ''],\n'score': ['0–1', '', '0–1'],\n'periods': [['0–1', '0–0', '0–0'], [], []],\n'features': [[], [], ['power play']],\n'player': ['', 'Fredrik Svensson', 'Mikko Mäenpää'],\n'assist': [[], [], ['Jani Keinänen', 'Toni Mäkiaho']],\n'team': ['', 'guest', 'guest'],\n'team_name': ['', 'HPK', 'HPK'],\n'time': ['', '9.28', '14.57'],\n'penalty_minutes': ['', '2', ''],\n'saves': ['', '', ''],\n'multi_reference': [false, false, false]\n}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus include 3 splits: train, validation, and test.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was created to develop machine learned text generation models for Finnish ice hockey news,\n              where the generation would reflect the natural language variation found from the game reports written by\n              professional journalists. While the original game reports often include additional information not\n              derivable from the game statistics, the corpus was fully manually curated to remove all such information\n              from the natural language descriptions. The rationale of such curation was to prevent model\n              'hallucinating' additional facts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis is the only data2text corpus for Finnish in GEM.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emorphological inflection, language variation\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points modified\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eStructural data was translated into English.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eWER\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAutomatic evaluation: BLEU, NIST, METEOR, ROUGE-L, CIDEr\n              Manual evaluation: factual mistakes, grammatical errors, minimum edit distance to an acceptable game\n              report (using WER)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is designed for text generation (data2text), where the original source of natural language\n              descriptions is news articles written by journalists. While the link between structural data (ice hockey\n              game statistics) and the news articles describing the game was quite weak (news articles including a lot\n              of information not derivable from the statistics, while leaving many events unmentioned), the corpus\n              includes full manual annotation aligning the events extracted from game statistics and the corresponding\n              natural language passages extracted from the news articles.\u003c/p\u003e\n            \u003cp\u003eEach event is manually aligned into a sentence-like passage, and in case a suitable passage was not\n              found, the annotation is left empty (with value \u003ccode\u003eNone\u003c/code\u003e). The extracted passages were manually\n              modified not to include additional information not derivable from the game statistics, or not considered\n              as world knowledge. The manual curation of passages is designed to prevent model hallucination, i.e. model\n              learning to generate facts not derivable from the input data.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDescribing the given events (structural data) in natural language, and therefore generating ice hockey\n              game reports.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe initial data, both game statistics and news articles, were obtained from the Finnish News Agency STT\n              news archives released for academic use (\u003ca\n                href=\"http://urn.fi/urn:nbn:fi:lb-2019041501\"\u003ehttp://urn.fi/urn:nbn:fi:lb-2019041501\u003c/a\u003e). The original\n              news articles are written by professional journalists.\u003c/p\u003e\n            \u003cp\u003eWe (TurkuNLP) gratefully acknowledge the collaboration of Maija Paikkala, Salla Salmela and Pihla\n              Lehmusjoki from the Finnish News Agency STT while creating the corpus.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIce hockey, news\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eInclude only games, where both game statistics and a news article describing the game were available\n              (based on timestamps and team names).\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eexpert created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMembers of the TurkuNLP research group, native speakers of Finnish.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eManual alignment of events and their natural language descriptions. Removing information not derivable\n              from the input data or world knowledge in order to prevent the model 'hallucination'.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curators\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eManual inspection of examples during the initial annotation training phrase.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus license was agreed with the providers of the source material.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset represents only written standard language.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"turku_hockey_data2text","type":"Data-to-Text","languages":"Finnish","summary":"This is a Finnish data-to-text dataset in which the input is structured information about a hockey game and the output a description of the game."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"turku_hockey_data2text"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/turku_paraphrase_corpus.html b/data_cards/turku_paraphrase_corpus.html
index cd5201bd..f77803bf 100644
--- a/data_cards/turku_paraphrase_corpus.html
+++ b/data_cards/turku_paraphrase_corpus.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->turku_paraphrase_corpus</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">turku_paraphrase_corpus</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Paraphrasing</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->turku_paraphrase_corpus</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">turku_paraphrase_corpus</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Paraphrasing</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>turku_paraphrase_corpus</h2>
@@ -1768,4 +1768,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"turku_paraphrase_corpus","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eturku_paraphrase_corpus\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis is a Finnish paraphrase corpus which consists of pairs of text passages, where a typical passage is\n          about a sentence long. It can be used to either identify or generate paraphrases.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/turku_paraphrase_corpus')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/turku_paraphrase_corpus\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://turkunlp.org/paraphrase.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.nodalida-main.29/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJenna Kanerva, Filip Ginter, Li-Hsin Chang, Iiro Rastas, Valtteri Skantsi, Jemina Kilpeläinen, Hanna-Mari\n          Kupari, Aurora Piirto, Jenna Saarni, Maija Sevón, Otto Tarkka (TurkuNLP / University of Turku)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJenna Kanerva, Filip Ginter\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eFinnish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe corpus provides naturally occurring Finnish paraphrases striving for low lexical overlap, thus supporting\n          many different downstream applications requiring language understanding.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eexpert created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003elikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://turkunlp.org/paraphrase.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/TurkuNLP/Turku-paraphrase-corpus\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.nodalida-main.29/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{kanerva-etal-2021-finnish,\ntitle = {Finnish Paraphrase Corpus},\nauthor = {Kanerva, Jenna and Ginter, Filip and Chang, Li-Hsin and Rastas, Iiro and Skantsi, Valtteri and Kilpel{\\\"a}inen, Jemina and Kupari, Hanna-Mari and Saarni, Jenna and Sev{\\'o}n, Maija and Tarkka, Otto},\nbooktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa'21)},\nyear = {2021},\npublisher = {Link{\\\"o}ping University Electronic Press, Sweden},\nurl = {https://aclanthology.org/2021.nodalida-main.29},\npages = {288--298}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jmnybl@utu.fi\"\u003ejmnybl@utu.fi\u003c/a\u003e, \u003ca href=\"mailto:figint@utu.fi\"\u003efigint@utu.fi\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ewritten standard language, spoken language\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFinnish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eParaphrase classification, paraphrase generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eParaphrasing\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus provides naturally occurring Finnish paraphrases striving for low lexical overlap, thus\n              supporting many different downstream applications requiring language understanding.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Turku\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter, Li-Hsin Chang, Iiro Rastas, Valtteri Skantsi, Jemina Kilpeläinen, Hanna-Mari\n              Kupari, Aurora Piirto, Jenna Saarni, Maija Sevón, Otto Tarkka (TurkuNLP / University of Turku)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Turku paraphrase corpus project was funded by the Academy of Finland, as well as the European\n              Language Grid project through its open call for pilot projects. The European Language Grid project has\n              received funding from the European Union’s Horizon 2020 Research and Innovation programme under Grant\n              Agreement no. 825627 (ELG).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter (TurkuNLP / University of Turku)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset consist of pairs of text passages, where a typical passage is about a sentence long, however,\n              a passage may also be longer or shorter than a sentence. Thus, each example include two text passages\n              (string), a manually annotated label to indicate the paraphrase type (string), and additional metadata.\n            \u003c/p\u003e\n            \u003cp\u003eThe dataset include three different \u003ccode\u003emodes\u003c/code\u003e, plain, classification, and generation. The\n              \u003ccode\u003eplain\u003c/code\u003e mode loads the original data without any additional preprocessing or transformations,\n              while the \u003ccode\u003eclassification\u003c/code\u003e mode directly builds the data in a form suitable for training a\n              paraphrase classifier, where each example is doubled in the data with different directions (text1, text2,\n              label) --\u003e (text2, text1, label) taking care of the label flipping as well if needed (paraphrases with\n              directionality flag \u0026#x3C; or \u003e). In the \u003ccode\u003egeneration\u003c/code\u003e mode, the examples are preprocessed to be\n              directly suitable for paraphrase generation task. In here, paraphrases not suitable for generation are\n              discarded (negative, and highly context-dependent paraphrases), and directional paraphrases are provided\n              so that the generation goes from more detailed passage to the more general one in order to prevent model\n              hallucination (i.e. model learning to introduce new information). The rest of the paraphrases are provided\n              in both directions (text1, text2, label) --\u003e (text2, text1, label).\u003c/p\u003e\n            \u003cp\u003eEach pair in \u003ccode\u003eplain\u003c/code\u003e and \u003ccode\u003eclassification\u003c/code\u003e mode will include fields:\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003egem_id\u003c/code\u003e: Identifier of the paraphrase pair (string)\n              \u003ccode\u003egoeswith\u003c/code\u003e: Identifier of the document from which the paraphrase was extracted, can be\n              \u003ccode\u003enot available\u003c/code\u003e in case the source of the paraphrase is not from document-structured data\n              (string)\n              \u003ccode\u003efold\u003c/code\u003e: 0-99, data split into 100 parts respecting document boundaries, you can use this e.g.\n              to implement crossvalidation safely as all paraphrases from one document are in one fold (int)\n              \u003ccode\u003etext1\u003c/code\u003e: First paraphrase passage (string)\n              \u003ccode\u003etext2\u003c/code\u003e: Second paraphrase passage (string)\n              \u003ccode\u003elabel\u003c/code\u003e: Manually annotated labels (string)\n              \u003ccode\u003ebinary_label\u003c/code\u003e: Label turned into binary with values \u003ccode\u003epositive\u003c/code\u003e (paraphrase) and\n              \u003ccode\u003enegative\u003c/code\u003e (not-paraphrase) (string)\n              \u003ccode\u003eis_rewrite\u003c/code\u003e: Indicator whether the example is human produced rewrite or naturally occurring\n              paraphrase (bool)\n            \u003c/p\u003e\n            \u003cp\u003eEach pair in \u003ccode\u003egeneration\u003c/code\u003e mode will include the same fields expect \u003ccode\u003etext1\u003c/code\u003e and\n              \u003ccode\u003etext2\u003c/code\u003e are renamed to \u003ccode\u003einput\u003c/code\u003e and \u003ccode\u003eoutput\u003c/code\u003e in order to indicate the\n              generation direction. Thus the fields are:\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003egem_id\u003c/code\u003e: Identifier of the paraphrase pair (string)\n              \u003ccode\u003egoeswith\u003c/code\u003e: Identifier of the document from which the paraphrase was extracted, can be\n              \u003ccode\u003enot available\u003c/code\u003e in case the source of the paraphrase is not from document-structured data\n              (string)\n              \u003ccode\u003efold\u003c/code\u003e: 0-99, data split into 100 parts respecting document boundaries, you can use this e.g.\n              to implement crossvalidation safely as all paraphrases from one document are in one fold (int)\n              \u003ccode\u003einput\u003c/code\u003e: The input paraphrase passage for generation (string)\n              \u003ccode\u003eoutput\u003c/code\u003e: The output paraphrase passage for generation (string)\n              \u003ccode\u003elabel\u003c/code\u003e: Manually annotated labels (string)\n              \u003ccode\u003ebinary_label\u003c/code\u003e: Label turned into binary with values \u003ccode\u003epositive\u003c/code\u003e (paraphrase) and\n              \u003ccode\u003enegative\u003c/code\u003e (not-paraphrase) (string)\n              \u003ccode\u003eis_rewrite\u003c/code\u003e: Indicator whether the example is human produced rewrite or naturally occurring\n              paraphrase (bool)\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'gem_id':  'gem-turku_paraphrase_corpus-train-15',\n'goeswith': 'episode-02243',\n'fold': 0,\n'text1': 'Mitä merkitystä sillä on?',\n'text2': 'Mitä väliä sillä edes on?',\n'label': '4',\n'binary_label': 'positive',\n'is_rewrite': False\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus include 3 splits: train, validation, and test.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is split randomly into the three section with a restriction of all paraphrases from the same\n              document (movie, TV episode, news article, student translation, or exam question) being in the same\n              section. All splits are manually annotated.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset provides a large amount of high quality (manually collected and verified) paraphrases for\n              Finnish.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enatural language understanding, language variation\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points modified\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData structure is slightly simplified, and the release provides ready made transformations into two tasks\n              (paraphrase classification and generation), where some data instances are doubled with different\n              direction, and some are discarded as not being suitable for generation (e.g. negatives).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enatural language understanding, language variation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eF-score in paraphrase classification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is fully manually annotated. The dataset strives for interesting paraphrases with low lexical\n              overlap, thus the annotation is two fold. First the paraphrases are manually extracted from two related\n              documents, where the annotators are instructed to extract only interesting paraphrases. In the second\n              phrase, all extracted paraphrases are manually labeled given the annotation scheme.\u003c/p\u003e\n            \u003cp\u003eThe annotation scheme is:\n              4 : paraphrase in all reasonably possible contexts\n              3 : paraphrase in the given document contexts, but not in general\n              2 : related but not paraphrase\n              During annotation also labels 1 (unrelated) and x (skip, e.g. wrong language) were used, however, the\n              insignificant amount of examples annotated with these labels were discarded from the released corpus.\u003c/p\u003e\n            \u003cp\u003eThe following flags are annotated to label 4 paraphrases:\n              \u0026#x3C; : txt1 is more general than txt2; txt2 is more specific than txt1 (directional paraphrase where\n              txt2 can be replaced with txt1 in all contexts but not to the other direction)\u003c/p\u003e\n            \u003cblockquote\u003e\n              \u003cp\u003e: txt2 is more general than txt1; txt1 is more specific than txt2 (directional paraphrase where txt1\n                can be replaced with txt2 in all contexts but not to the other direction)\n                i : minor traceable difference (differing in terms of grammatical number or case, 'this' vs 'that',\n                etc.)\n                s : style or strength difference (e.g. equivalent meaning, but one of the statements substantially more\n                colloquial than the other)\u003c/p\u003e\n            \u003c/blockquote\u003e\n            \u003cp\u003eFor paraphrases where the annotated label was something else than label 4 without any flags, the\n              annotators had an option to rewrite the text passages so that the rewritten paraphrase pair formed label 4\n              (universal) paraphrase. This was used for cases where simple edit would turn e.g. contextual or\n              directional paraphrase into universal one. For the rewritten examples, both the original and the rewritten\n              pairs are available with corresponding labels annotated.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRepresenting text passages with identical meaning but different surface realization.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emovie and TV series subtitles (82%)\n              news articles (9%)\n              discussion forum messages (8%)\n              university translation exercises (1%)\n              university course essays and exams (\u0026#x3C;1%)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e, \u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e, \u003ccode\u003eOffline media collection\u003c/code\u003e, \u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe movie and TV series subtitles are extracted from OPUS OpenSubtitles2018 collection, which is based on\n              data from \u003ca href=\"http://www.opensubtitles.org/\"\u003eOpenSubtitles\u003c/a\u003e.\n              The news articles are collected from two Finnish news sites, YLE and HS, during years 2017-2020.\n              Discussion forum messages are obtained from the Finnish Suomi24 discussion forum released for academic use\n              (\u003ca href=\"http://urn.fi/urn:nbn:fi:lb-2020021801\"\u003ehttp://urn.fi/urn:nbn:fi:lb-2020021801\u003c/a\u003e).\n              University translation exercises, essays and exams are collected during the project.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eexpert created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e2\u0026#x3C;n\u0026#x3C;10\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMembers of the TurkuNLP research group, native speakers of Finnish, each annotator has a strong\n              background in language studies by having an academic degree or ongoing studies in a field related to\n              languages or linguistics.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003col\u003e\n              \u003cli\u003eManual extraction of interesting paraphrases from two related documents.\u003c/li\u003e\n              \u003cli\u003eManual labeling of each extracted paraphrase based on the given annotation scheme, e.g. distinguishing\n                contextual and universal paraphrases, marking style or strength differences, etc.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by another rater\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePartial double annotation, double annotation batches are assigned regularly in order to monitor\n              annotation consistency. In double annotation, one annotator first extracts the candidate paraphrases, and\n              these candidates are assigned to two different annotators, who does the label annotation independently\n              from each other. Afterwards, the label annotations are merged, and conflicting labels are resolved\n              together with the whole annotation team.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus is mostly based on public/open data. For other data sources (student material), the licensing\n              was agreed with the data providers during the collection.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003elikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"turku_paraphrase_corpus","type":"Paraphrasing","languages":"Finnish","summary":"This is a Finnish paraphrase corpus which consists of pairs of text passages, where a typical passage is about a sentence long. It can be used to either identify or generate paraphrases."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"turku_paraphrase_corpus"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"turku_paraphrase_corpus","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eturku_paraphrase_corpus\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThis is a Finnish paraphrase corpus which consists of pairs of text passages, where a typical passage is\n          about a sentence long. It can be used to either identify or generate paraphrases.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/turku_paraphrase_corpus')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/turku_paraphrase_corpus\"\u003ehere\u003c/a\u003e.\n        \u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://turkunlp.org/paraphrase.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.nodalida-main.29/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJenna Kanerva, Filip Ginter, Li-Hsin Chang, Iiro Rastas, Valtteri Skantsi, Jemina Kilpeläinen, Hanna-Mari\n          Kupari, Aurora Piirto, Jenna Saarni, Maija Sevón, Otto Tarkka (TurkuNLP / University of Turku)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJenna Kanerva, Filip Ginter\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eFinnish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe corpus provides naturally occurring Finnish paraphrases striving for low lexical overlap, thus supporting\n          many different downstream applications requiring language understanding.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eexpert created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003elikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://turkunlp.org/paraphrase.html\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/TurkuNLP/Turku-paraphrase-corpus\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.nodalida-main.29/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{kanerva-etal-2021-finnish,\ntitle = {Finnish Paraphrase Corpus},\nauthor = {Kanerva, Jenna and Ginter, Filip and Chang, Li-Hsin and Rastas, Iiro and Skantsi, Valtteri and Kilpel{\\\"a}inen, Jemina and Kupari, Hanna-Mari and Saarni, Jenna and Sev{\\'o}n, Maija and Tarkka, Otto},\nbooktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa'21)},\nyear = {2021},\npublisher = {Link{\\\"o}ping University Electronic Press, Sweden},\nurl = {https://aclanthology.org/2021.nodalida-main.29},\npages = {288--298}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jmnybl@utu.fi\"\u003ejmnybl@utu.fi\u003c/a\u003e, \u003ca href=\"mailto:figint@utu.fi\"\u003efigint@utu.fi\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ewritten standard language, spoken language\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFinnish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eParaphrase classification, paraphrase generation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eParaphrasing\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus provides naturally occurring Finnish paraphrases striving for low lexical overlap, thus\n              supporting many different downstream applications requiring language understanding.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Turku\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter, Li-Hsin Chang, Iiro Rastas, Valtteri Skantsi, Jemina Kilpeläinen, Hanna-Mari\n              Kupari, Aurora Piirto, Jenna Saarni, Maija Sevón, Otto Tarkka (TurkuNLP / University of Turku)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Turku paraphrase corpus project was funded by the Academy of Finland, as well as the European\n              Language Grid project through its open call for pilot projects. The European Language Grid project has\n              received funding from the European Union’s Horizon 2020 Research and Innovation programme under Grant\n              Agreement no. 825627 (ELG).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenna Kanerva, Filip Ginter (TurkuNLP / University of Turku)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset consist of pairs of text passages, where a typical passage is about a sentence long, however,\n              a passage may also be longer or shorter than a sentence. Thus, each example include two text passages\n              (string), a manually annotated label to indicate the paraphrase type (string), and additional metadata.\n            \u003c/p\u003e\n            \u003cp\u003eThe dataset include three different \u003ccode\u003emodes\u003c/code\u003e, plain, classification, and generation. The\n              \u003ccode\u003eplain\u003c/code\u003e mode loads the original data without any additional preprocessing or transformations,\n              while the \u003ccode\u003eclassification\u003c/code\u003e mode directly builds the data in a form suitable for training a\n              paraphrase classifier, where each example is doubled in the data with different directions (text1, text2,\n              label) --\u003e (text2, text1, label) taking care of the label flipping as well if needed (paraphrases with\n              directionality flag \u0026#x3C; or \u003e). In the \u003ccode\u003egeneration\u003c/code\u003e mode, the examples are preprocessed to be\n              directly suitable for paraphrase generation task. In here, paraphrases not suitable for generation are\n              discarded (negative, and highly context-dependent paraphrases), and directional paraphrases are provided\n              so that the generation goes from more detailed passage to the more general one in order to prevent model\n              hallucination (i.e. model learning to introduce new information). The rest of the paraphrases are provided\n              in both directions (text1, text2, label) --\u003e (text2, text1, label).\u003c/p\u003e\n            \u003cp\u003eEach pair in \u003ccode\u003eplain\u003c/code\u003e and \u003ccode\u003eclassification\u003c/code\u003e mode will include fields:\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003egem_id\u003c/code\u003e: Identifier of the paraphrase pair (string)\n              \u003ccode\u003egoeswith\u003c/code\u003e: Identifier of the document from which the paraphrase was extracted, can be\n              \u003ccode\u003enot available\u003c/code\u003e in case the source of the paraphrase is not from document-structured data\n              (string)\n              \u003ccode\u003efold\u003c/code\u003e: 0-99, data split into 100 parts respecting document boundaries, you can use this e.g.\n              to implement crossvalidation safely as all paraphrases from one document are in one fold (int)\n              \u003ccode\u003etext1\u003c/code\u003e: First paraphrase passage (string)\n              \u003ccode\u003etext2\u003c/code\u003e: Second paraphrase passage (string)\n              \u003ccode\u003elabel\u003c/code\u003e: Manually annotated labels (string)\n              \u003ccode\u003ebinary_label\u003c/code\u003e: Label turned into binary with values \u003ccode\u003epositive\u003c/code\u003e (paraphrase) and\n              \u003ccode\u003enegative\u003c/code\u003e (not-paraphrase) (string)\n              \u003ccode\u003eis_rewrite\u003c/code\u003e: Indicator whether the example is human produced rewrite or naturally occurring\n              paraphrase (bool)\n            \u003c/p\u003e\n            \u003cp\u003eEach pair in \u003ccode\u003egeneration\u003c/code\u003e mode will include the same fields expect \u003ccode\u003etext1\u003c/code\u003e and\n              \u003ccode\u003etext2\u003c/code\u003e are renamed to \u003ccode\u003einput\u003c/code\u003e and \u003ccode\u003eoutput\u003c/code\u003e in order to indicate the\n              generation direction. Thus the fields are:\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003egem_id\u003c/code\u003e: Identifier of the paraphrase pair (string)\n              \u003ccode\u003egoeswith\u003c/code\u003e: Identifier of the document from which the paraphrase was extracted, can be\n              \u003ccode\u003enot available\u003c/code\u003e in case the source of the paraphrase is not from document-structured data\n              (string)\n              \u003ccode\u003efold\u003c/code\u003e: 0-99, data split into 100 parts respecting document boundaries, you can use this e.g.\n              to implement crossvalidation safely as all paraphrases from one document are in one fold (int)\n              \u003ccode\u003einput\u003c/code\u003e: The input paraphrase passage for generation (string)\n              \u003ccode\u003eoutput\u003c/code\u003e: The output paraphrase passage for generation (string)\n              \u003ccode\u003elabel\u003c/code\u003e: Manually annotated labels (string)\n              \u003ccode\u003ebinary_label\u003c/code\u003e: Label turned into binary with values \u003ccode\u003epositive\u003c/code\u003e (paraphrase) and\n              \u003ccode\u003enegative\u003c/code\u003e (not-paraphrase) (string)\n              \u003ccode\u003eis_rewrite\u003c/code\u003e: Indicator whether the example is human produced rewrite or naturally occurring\n              paraphrase (bool)\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'gem_id':  'gem-turku_paraphrase_corpus-train-15',\n'goeswith': 'episode-02243',\n'fold': 0,\n'text1': 'Mitä merkitystä sillä on?',\n'text2': 'Mitä väliä sillä edes on?',\n'label': '4',\n'binary_label': 'positive',\n'is_rewrite': False\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus include 3 splits: train, validation, and test.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is split randomly into the three section with a restriction of all paraphrases from the same\n              document (movie, TV episode, news article, student translation, or exam question) being in the same\n              section. All splits are manually annotated.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset provides a large amount of high quality (manually collected and verified) paraphrases for\n              Finnish.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enatural language understanding, language variation\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003edata points modified\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData structure is slightly simplified, and the release provides ready made transformations into two tasks\n              (paraphrase classification and generation), where some data instances are doubled with different\n              direction, and some are discarded as not being suitable for generation (e.g. negatives).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enatural language understanding, language variation\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eF-score in paraphrase classification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is fully manually annotated. The dataset strives for interesting paraphrases with low lexical\n              overlap, thus the annotation is two fold. First the paraphrases are manually extracted from two related\n              documents, where the annotators are instructed to extract only interesting paraphrases. In the second\n              phrase, all extracted paraphrases are manually labeled given the annotation scheme.\u003c/p\u003e\n            \u003cp\u003eThe annotation scheme is:\n              4 : paraphrase in all reasonably possible contexts\n              3 : paraphrase in the given document contexts, but not in general\n              2 : related but not paraphrase\n              During annotation also labels 1 (unrelated) and x (skip, e.g. wrong language) were used, however, the\n              insignificant amount of examples annotated with these labels were discarded from the released corpus.\u003c/p\u003e\n            \u003cp\u003eThe following flags are annotated to label 4 paraphrases:\n              \u0026#x3C; : txt1 is more general than txt2; txt2 is more specific than txt1 (directional paraphrase where\n              txt2 can be replaced with txt1 in all contexts but not to the other direction)\u003c/p\u003e\n            \u003cblockquote\u003e\n              \u003cp\u003e: txt2 is more general than txt1; txt1 is more specific than txt2 (directional paraphrase where txt1\n                can be replaced with txt2 in all contexts but not to the other direction)\n                i : minor traceable difference (differing in terms of grammatical number or case, 'this' vs 'that',\n                etc.)\n                s : style or strength difference (e.g. equivalent meaning, but one of the statements substantially more\n                colloquial than the other)\u003c/p\u003e\n            \u003c/blockquote\u003e\n            \u003cp\u003eFor paraphrases where the annotated label was something else than label 4 without any flags, the\n              annotators had an option to rewrite the text passages so that the rewritten paraphrase pair formed label 4\n              (universal) paraphrase. This was used for cases where simple edit would turn e.g. contextual or\n              directional paraphrase into universal one. For the rewritten examples, both the original and the rewritten\n              pairs are available with corresponding labels annotated.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRepresenting text passages with identical meaning but different surface realization.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emovie and TV series subtitles (82%)\n              news articles (9%)\n              discussion forum messages (8%)\n              university translation exercises (1%)\n              university course essays and exams (\u0026#x3C;1%)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e, \u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e, \u003ccode\u003eOffline media collection\u003c/code\u003e, \u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe movie and TV series subtitles are extracted from OPUS OpenSubtitles2018 collection, which is based on\n              data from \u003ca href=\"http://www.opensubtitles.org/\"\u003eOpenSubtitles\u003c/a\u003e.\n              The news articles are collected from two Finnish news sites, YLE and HS, during years 2017-2020.\n              Discussion forum messages are obtained from the Finnish Suomi24 discussion forum released for academic use\n              (\u003ca href=\"http://urn.fi/urn:nbn:fi:lb-2020021801\"\u003ehttp://urn.fi/urn:nbn:fi:lb-2020021801\u003c/a\u003e).\n              University translation exercises, essays and exams are collected during the project.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eexpert created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e2\u0026#x3C;n\u0026#x3C;10\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eMembers of the TurkuNLP research group, native speakers of Finnish, each annotator has a strong\n              background in language studies by having an academic degree or ongoing studies in a field related to\n              languages or linguistics.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003col\u003e\n              \u003cli\u003eManual extraction of interesting paraphrases from two related documents.\u003c/li\u003e\n              \u003cli\u003eManual labeling of each extracted paraphrase based on the given annotation scheme, e.g. distinguishing\n                contextual and universal paraphrases, marking style or strength differences, etc.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by another rater\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePartial double annotation, double annotation batches are assigned regularly in order to monitor\n              annotation consistency. In double annotation, one annotator first extracts the candidate paraphrases, and\n              these candidates are assigned to two different annotators, who does the label annotation independently\n              from each other. Afterwards, the label annotations are merged, and conflicting labels are resolved\n              together with the whole annotation team.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe corpus is mostly based on public/open data. For other data sources (student material), the licensing\n              was agreed with the data providers during the collection.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003elikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"turku_paraphrase_corpus","type":"Paraphrasing","languages":"Finnish","summary":"This is a Finnish paraphrase corpus which consists of pairs of text passages, where a typical passage is about a sentence long. It can be used to either identify or generate paraphrases."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"turku_paraphrase_corpus"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/viggo.html b/data_cards/viggo.html
index a94e5270..fc986f2c 100644
--- a/data_cards/viggo.html
+++ b/data_cards/viggo.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->viggo</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">viggo</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->viggo</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">viggo</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>viggo</h2>
@@ -1641,4 +1641,4 @@ <h5>Technical Limitations
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"viggo","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eviggo\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eViGGO is an English data-to-text generation dataset in the video game domain, with target responses being\n          more conversational than information-seeking, yet constrained to the information presented in a meaning\n          representation. The dataset is relatively small with about 5,000 datasets but very clean, and can thus serve\n          for evaluating transfer learning, low-resource, or few-shot capabilities of neural models.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/viggo')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/viggo\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://nlds.soe.ucsc.edu/viggo\"\u003eWesbite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-8623/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJuraj Juraska, Kevin K. Bowden, Marilyn Walker\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJuraj Juraska\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://nlds.soe.ucsc.edu/viggo\"\u003eWesbite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-8623/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{juraska-etal-2019-viggo,\ntitle = \"{V}i{GGO}: A Video Game Corpus for Data-To-Text Generation in Open-Domain Conversation\",\nauthor = \"Juraska, Juraj  and\nBowden, Kevin  and\nWalker, Marilyn\",\nbooktitle = \"Proceedings of the 12th International Conference on Natural Language Generation\",\nmonth = oct # \"{--}\" # nov,\nyear = \"2019\",\naddress = \"Tokyo, Japan\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/W19-8623\",\ndoi = \"10.18653/v1/W19-8623\",\npages = \"164--172\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJuraj Juraska\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jjuraska@ucsc.edu\"\u003ejjuraska@ucsc.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eViGGO was designed for the task of data-to-text generation in chatbots (as opposed to task-oriented\n              dialogue systems), with target responses being more conversational than information-seeking, yet\n              constrained to the information presented in a meaning representation. The dataset, being relatively small\n              and clean, can also serve for demonstrating transfer learning capabilities of neural models.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of California, Santa Cruz\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJuraj Juraska, Kevin K. Bowden, Marilyn Walker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJuraj Juraska\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach example in the dataset has the following two fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003emr\u003c/code\u003e: A meaning representation (MR) that, in a structured format, provides the information\n                to convey, as well as the desired dialogue act (DA) type.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eref\u003c/code\u003e: A reference output, i.e., a corresponding utterance realizing all the information in\n                the MR.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eEach MR is a flattened dictionary of attribute-and-value pairs, \"wrapped\" in the dialogue act type\n              indication. This format was chosen primarily for its compactness, but also to allow for easy concatenation\n              of multiple DAs (each with potentially different attributes) in a single MR.\u003c/p\u003e\n            \u003cp\u003eFollowing is the list of all possible attributes (which are also refered to as \"slots\") in ViGGO along\n              with their types/possible values:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003ename\u003c/code\u003e: The name of a video game (e.g., Rise of the Tomb Raider).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003erelease_year\u003c/code\u003e: The year a video game was released in (e.g., 2015).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eexp_release_date\u003c/code\u003e: For a not-yet-released game, the date when it is expected to be\n                released (e.g., February 22, 2019). \u003cem\u003eNote: This slot cannot appear together with\n                  \u003ccode\u003erelease_year\u003c/code\u003e in the same dialogue act.\u003c/em\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edeveloper\u003c/code\u003e: The name of the studio/person that created the game (e.g., Crystal Dynamics).\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egenres\u003c/code\u003e: A list of one or more genre labels from a set of possible values (e.g.,\n                action-adventure, shooter).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eplayer_perspective\u003c/code\u003e: A list of one or more perspectives from which the game is/can be\n                played (possible values: first person, third person, side view, bird view).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eplatforms\u003c/code\u003e: A list of one or more gaming platforms the game was officially released for\n                (possible values: PC, PlayStation, Xbox, Nintendo, Nintendo Switch).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eesrb\u003c/code\u003e: A game's content rating as determined by the ESRB (possible values: E (for\n                Everyone), E 10+ (for Everyone 10 and Older), T (for Teen), M (for Mature)).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003erating\u003c/code\u003e: Depending on the dialogue act this slot is used with, it is a categorical\n                representation of either the game's average rating or the game's liking (possible values: excellent,\n                good, average, poor).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehas_multiplayer\u003c/code\u003e: Indicates whether a game supports multiplayer or can only be played in\n                single-player mode (possible values: yes, no).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eavailable_on_steam\u003c/code\u003e: Indicates whether a game can be purchased through the Steam digital\n                distribution service (possible values: yes, no).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehas_linux_release\u003c/code\u003e: Indicates whether a game is supported on Linux operating systems\n                (possible values: yes, no).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehas_mac_release\u003c/code\u003e: Indicates whether a game is supported on macOS (possible values: yes,\n                no).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003especifier\u003c/code\u003e: A game specifier used by the \u003ccode\u003erequest\u003c/code\u003e DA, typically an adjective\n                (e.g., addictive, easiest, overrated, visually impressive).\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eEach MR in the dataset has 3 distinct reference utterances, which are represented as 3 separate examples\n              with the same MR.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset structure mostly follows the format of the popular E2E dataset, however, with added dialogue\n              act type indications, new list-type attributes introduced, and unified naming convention for multi-word\n              attribute names.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"mr\": \"give_opinion(name[SpellForce 3], rating[poor], genres[real-time strategy, role-playing], player_perspective[bird view])\",\n\"ref\": \"I think that SpellForce 3 is one of the worst games I've ever played. Trying to combine the real-time strategy and role-playing genres just doesn't work, and the bird view perspective makes it near impossible to play.\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eViGGO is split into 3 partitions, with no MRs in common between the training set and either of the\n              validation and the test set (and that \u003cem\u003eafter\u003c/em\u003e delexicalizing the \u003ccode\u003ename\u003c/code\u003e and\n              \u003ccode\u003edeveloper\u003c/code\u003e slots). The ratio of examples in the partitions is approximately 7.5 : 1 : 1.5,\n              with their exact sizes listed below:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003cstrong\u003eTrain:\u003c/strong\u003e 5,103 (1,675 unique MRs)\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eValidation:\u003c/strong\u003e 714 (238 unique MRs)\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eTest:\u003c/strong\u003e 1,083 (359 unique MRs)\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eTOTAL:\u003c/strong\u003e 6,900 (2,253 unique MRs)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003e\u003cem\u003eNote: The reason why the number of unique MRs is not exactly one third of all examples is that for\n                each \u003ccode\u003erequest_attribute\u003c/code\u003e DA (which only has one slot, and that without a value) 12 reference\n                utterances were collected instead of 3.\u003c/em\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA similar MR length and slot distribution was preserved across the partitions. The distribution of DA\n              types, on the other hand, is skewed slightly toward fewer \u003ccode\u003einform\u003c/code\u003e DA instances (the most\n              prevalent DA type) and a higher proportion of the less prevalent DAs in the validation and the test set.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"mr\": \"request_attribute(player_perspective[])\",\n\"ref\": \"Is there a certain player perspective that you prefer over others in games you play?\"\n},\n{\n\"mr\": \"inform(name[FIFA 12], esrb[E (for Everyone)], genres[simulation, sport], player_perspective[bird view, side view], platforms[PlayStation, Xbox, Nintendo, PC], available_on_steam[no])\",\n\"ref\": \"Fifa 12 is a decent sports simulator. It's pretty cool how the game swaps from the bird's eye perspective down to a side view while you're playing. You can get the game for PlayStation, Xbox, Nintendo consoles, and PC, but unfortunately it's not on Steam. Of course, as a sports game there's not much objectionable content so it's rated E.\"\n},\n{\n\"mr\": \"inform(name[Super Bomberman], release_year[1993], genres[action, strategy], has_multiplayer[no], platforms[Nintendo, PC], available_on_steam[no], has_linux_release[no], has_mac_release[no])\",\n\"ref\": \"Super Bomberman is one of my favorite Nintendo games, also available on PC, though not through Steam. It came out all the way back in 1993, and you can't get it for any modern consoles, unfortunately, so no online multiplayer, or of course Linux or Mac releases either. That said, it's still one of the most addicting action-strategy games out there.\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eViGGO is a fairly small dataset but includes a greater variety of utterance types than most other\n              datasets for NLG from structured meaning representations. This makes it more interesting from the\n              perspective of model evaluation, since models have to learn to differentiate between various dialogue act\n              types that share the same slots.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eViGGO's language is more casual and conversational -- as opposed to information-seeking -- which\n              differentiates it from the majority of popular datasets for the same type of data-to-text task. Moreover,\n              the video game domain is a rather uncommon one in the NLG community, despite being very well-suited for\n              data-to-text generation, considering it offers entities with many attributes to talk about, which can be\n              described in a structured format.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"http://www.macs.hw.ac.uk/InteractionLab/E2E/\"\u003eE2E NLG Challenge\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eMR = meaning representation\u003c/li\u003e\n              \u003cli\u003eDA = dialogue act\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eBLEURT\u003c/code\u003e,\n              \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSER (slot error rate): Indicates the proportion of missing/incorrect/duplicate/hallucinated slot mentions\n              in the utterances across a test set. The closer to zero a model scores in this metric, the more\n              semantically accurate its outputs are. This metric is typically calculated either manually on a small\n              sample of generated outputs, or heuristically using domain-specific regex rules and gazetteers.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/W19-8623/\"\u003eJuraska et al., 2019. ViGGO: A Video Game Corpus for\n                  Data-To-Text Generation in Open-Domain Conversation.\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.coling-main.218/\"\u003eHarkous et al., 2020. Have Your Text and Use\n                  It Too! End-to-End Neural Data-to-Text Generation with Semantic Fidelity.\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.419/\"\u003eKedzie and McKeown, 2020. Controllable Meaning\n                  Representation to Text Generation: Linearization and Data Augmentation Strategies.\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2021.inlg-1.45/\"\u003eJuraska and Walker, 2021. Attention Is Indeed All\n                  You Need: Semantically Attention-Guided Decoding for Data-to-Text NLG.\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe primary motivation behind ViGGO was to create a data-to-text corpus in a new but conversational\n              domain, and intended for use in open-domain chatbots rather than task-oriented dialogue systems. To this\n              end, the dataset contains utterances of 9 generalizable and conversational dialogue act types, revolving\n              around various aspects of video games. The idea is that similar, relatively small datasets could fairly\n              easily be collected for other conversational domains -- especially other entertainment domains (such as\n              music or books), but perhaps also topics like animals or food -- to support an open-domain conversational\n              agent with controllable neural NLG.\u003c/p\u003e\n            \u003cp\u003eAnother desired quality of the ViGGO dataset was cleanliness (no typos and grammatical errors) and\n              semantic accuracy, which has often not been the case with other crowdsourced data-to-text corpora. In\n              general, for the data-to-text generation task, there is arguably no need to put the burden on the\n              generation model to figure out the noise, since the noise would not be expected to be there in a\n              real-world system whose dialogue manager that creates the input for the NLG module is usually configurable\n              and tightly controlled.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProduce a response from a structured meaning representation in the context of a conversation about video\n              games. It can be a brief opinion or a description of a game, as well as a request for attribute (e.g.,\n              genre, player perspective, or platform) preference/confirmation or an inquiry about liking a particular\n              type of games.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe paid crowdworkers who produced the reference utterances were from English-speaking countries, and\n              they had at least 1,000 HITs approved and a HIT approval rate of 98% or more. Furthermore, in the\n              instructions, crowdworkers were discouraged from taking on the task unless they considered themselves a\n              gamer.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset focuses on video games and their various aspects, and hence the language of the utterances\n              may contain video game-specific jargon.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFirst, regular expressions were used to enforce several standardization policies regarding special\n              characters, punctuation, and the correction of undesired abbreviations/misspellings of standard\n              domain-specific terms (e.g., terms like \"Play station\" or \"PS4\" would be changed to the uniform\n              \"PlayStation\"). At the same time, hyphens were removed or enforced uniformly in certain terms, for\n              example, \"single-player\". Although phrases such as \"first person\" should correctly have a hyphen when used\n              as adjective, the crowdworkers used this rule very inconsistently. In order to avoid model outputs being\n              penalized during the evaluation by the arbitrary choice of a hyphen presence or absence in the reference\n              utterances, the hyphen was removed in all such phrases regardless of the noun vs. adjective use.\u003c/p\u003e\n            \u003cp\u003eSecond, an extensive set of heuristics was developed to identify slot-related errors. This process\n              revealed the vast majority of missing or incorrect slot mentions, which were subsequently fixed according\n              to the corresponding MRs. This eventually led to the development of a robust, cross-domain, heuristic slot\n              aligner that can be used for automatic slot error rate evaluation. For details, see the appendix in \u003ca\n                href=\"https://aclanthology.org/2021.inlg-1.45/\"\u003eJuraska and Walker, 2021\u003c/a\u003e.\u003c/p\u003e\n            \u003cp\u003eCrowdworkers would sometimes also inject a piece of information which was not present in the MR, some of\n              which is not even represented by any of the slots, e.g., plot or main characters. This unsolicited\n              information was removed from the utterances so as to avoid confusing the neural model. Finally, any\n              remaining typos and grammatical errors were resolved.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emanually\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCompliance with the indicated dialogue act type, semantic accuracy (i.e., all information in the\n              corresponding MR mentioned and that correctly), and minimal extraneous information (e.g., personal\n              experience/opinion). Whenever it was within a reasonable amount of effort, the utterances were manually\n              fixed instead of being discarded/crowdsourced anew.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrowdworkers were instructed to only express the information in the provided meaning representation,\n              which never prompted them to mention anything about themselves. Occasionally, they would still include a\n              bit of personal experience (e.g., \"I used to like the game as a kid.\") or opinion, but these would be too\n              general to be considered PII.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is limited to a single domain: video games. One caveat of using a language generator trained\n              on this dataset in a dialogue system as-is is that multiple subsequent turns discussing the same video\n              game would be repeating its full name. ViGGO was designed for generation without context, and therefore it\n              is up to the dialogue manager to ensure that pronouns are substituted for the names whenever it would\n              sound more natural in a dialogue. Alternately, the dataset can easily be augmented with automatically\n              constructed samples which omit the \u003ccode\u003ename\u003c/code\u003e slot in the MR and replace the name with a pronoun in\n              the reference utterance.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"viggo","type":"Data-to-Text","languages":"English","summary":"ViGGO is an English data-to-text generation dataset in the video game domain, with target responses being more conversational than information-seeking, yet constrained to the information presented in a meaning representation. The dataset is relatively small with about 5,000 datasets but very clean, and can thus serve for evaluating transfer learning, low-resource, or few-shot capabilities of neural models."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"viggo"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"viggo","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eviggo\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eViGGO is an English data-to-text generation dataset in the video game domain, with target responses being\n          more conversational than information-seeking, yet constrained to the information presented in a meaning\n          representation. The dataset is relatively small with about 5,000 datasets but very clean, and can thus serve\n          for evaluating transfer learning, low-resource, or few-shot capabilities of neural models.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/viggo')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/viggo\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://nlds.soe.ucsc.edu/viggo\"\u003eWesbite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-8623/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJuraj Juraska, Kevin K. Bowden, Marilyn Walker\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eJuraj Juraska\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://nlds.soe.ucsc.edu/viggo\"\u003eWesbite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/W19-8623/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{juraska-etal-2019-viggo,\ntitle = \"{V}i{GGO}: A Video Game Corpus for Data-To-Text Generation in Open-Domain Conversation\",\nauthor = \"Juraska, Juraj  and\nBowden, Kevin  and\nWalker, Marilyn\",\nbooktitle = \"Proceedings of the 12th International Conference on Natural Language Generation\",\nmonth = oct # \"{--}\" # nov,\nyear = \"2019\",\naddress = \"Tokyo, Japan\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/W19-8623\",\ndoi = \"10.18653/v1/W19-8623\",\npages = \"164--172\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJuraj Juraska\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jjuraska@ucsc.edu\"\u003ejjuraska@ucsc.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eViGGO was designed for the task of data-to-text generation in chatbots (as opposed to task-oriented\n              dialogue systems), with target responses being more conversational than information-seeking, yet\n              constrained to the information presented in a meaning representation. The dataset, being relatively small\n              and clean, can also serve for demonstrating transfer learning capabilities of neural models.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of California, Santa Cruz\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJuraj Juraska, Kevin K. Bowden, Marilyn Walker\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJuraj Juraska\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach example in the dataset has the following two fields:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003emr\u003c/code\u003e: A meaning representation (MR) that, in a structured format, provides the information\n                to convey, as well as the desired dialogue act (DA) type.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eref\u003c/code\u003e: A reference output, i.e., a corresponding utterance realizing all the information in\n                the MR.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eEach MR is a flattened dictionary of attribute-and-value pairs, \"wrapped\" in the dialogue act type\n              indication. This format was chosen primarily for its compactness, but also to allow for easy concatenation\n              of multiple DAs (each with potentially different attributes) in a single MR.\u003c/p\u003e\n            \u003cp\u003eFollowing is the list of all possible attributes (which are also refered to as \"slots\") in ViGGO along\n              with their types/possible values:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003ename\u003c/code\u003e: The name of a video game (e.g., Rise of the Tomb Raider).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003erelease_year\u003c/code\u003e: The year a video game was released in (e.g., 2015).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eexp_release_date\u003c/code\u003e: For a not-yet-released game, the date when it is expected to be\n                released (e.g., February 22, 2019). \u003cem\u003eNote: This slot cannot appear together with\n                  \u003ccode\u003erelease_year\u003c/code\u003e in the same dialogue act.\u003c/em\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003edeveloper\u003c/code\u003e: The name of the studio/person that created the game (e.g., Crystal Dynamics).\n              \u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egenres\u003c/code\u003e: A list of one or more genre labels from a set of possible values (e.g.,\n                action-adventure, shooter).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eplayer_perspective\u003c/code\u003e: A list of one or more perspectives from which the game is/can be\n                played (possible values: first person, third person, side view, bird view).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eplatforms\u003c/code\u003e: A list of one or more gaming platforms the game was officially released for\n                (possible values: PC, PlayStation, Xbox, Nintendo, Nintendo Switch).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eesrb\u003c/code\u003e: A game's content rating as determined by the ESRB (possible values: E (for\n                Everyone), E 10+ (for Everyone 10 and Older), T (for Teen), M (for Mature)).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003erating\u003c/code\u003e: Depending on the dialogue act this slot is used with, it is a categorical\n                representation of either the game's average rating or the game's liking (possible values: excellent,\n                good, average, poor).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehas_multiplayer\u003c/code\u003e: Indicates whether a game supports multiplayer or can only be played in\n                single-player mode (possible values: yes, no).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eavailable_on_steam\u003c/code\u003e: Indicates whether a game can be purchased through the Steam digital\n                distribution service (possible values: yes, no).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehas_linux_release\u003c/code\u003e: Indicates whether a game is supported on Linux operating systems\n                (possible values: yes, no).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehas_mac_release\u003c/code\u003e: Indicates whether a game is supported on macOS (possible values: yes,\n                no).\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003especifier\u003c/code\u003e: A game specifier used by the \u003ccode\u003erequest\u003c/code\u003e DA, typically an adjective\n                (e.g., addictive, easiest, overrated, visually impressive).\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eEach MR in the dataset has 3 distinct reference utterances, which are represented as 3 separate examples\n              with the same MR.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset structure mostly follows the format of the popular E2E dataset, however, with added dialogue\n              act type indications, new list-type attributes introduced, and unified naming convention for multi-word\n              attribute names.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"mr\": \"give_opinion(name[SpellForce 3], rating[poor], genres[real-time strategy, role-playing], player_perspective[bird view])\",\n\"ref\": \"I think that SpellForce 3 is one of the worst games I've ever played. Trying to combine the real-time strategy and role-playing genres just doesn't work, and the bird view perspective makes it near impossible to play.\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eViGGO is split into 3 partitions, with no MRs in common between the training set and either of the\n              validation and the test set (and that \u003cem\u003eafter\u003c/em\u003e delexicalizing the \u003ccode\u003ename\u003c/code\u003e and\n              \u003ccode\u003edeveloper\u003c/code\u003e slots). The ratio of examples in the partitions is approximately 7.5 : 1 : 1.5,\n              with their exact sizes listed below:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003cstrong\u003eTrain:\u003c/strong\u003e 5,103 (1,675 unique MRs)\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eValidation:\u003c/strong\u003e 714 (238 unique MRs)\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eTest:\u003c/strong\u003e 1,083 (359 unique MRs)\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eTOTAL:\u003c/strong\u003e 6,900 (2,253 unique MRs)\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003e\u003cem\u003eNote: The reason why the number of unique MRs is not exactly one third of all examples is that for\n                each \u003ccode\u003erequest_attribute\u003c/code\u003e DA (which only has one slot, and that without a value) 12 reference\n                utterances were collected instead of 3.\u003c/em\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA similar MR length and slot distribution was preserved across the partitions. The distribution of DA\n              types, on the other hand, is skewed slightly toward fewer \u003ccode\u003einform\u003c/code\u003e DA instances (the most\n              prevalent DA type) and a higher proportion of the less prevalent DAs in the validation and the test set.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003e\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does an outlier of the dataset in terms of length/perplexity/embedding look like?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"mr\": \"request_attribute(player_perspective[])\",\n\"ref\": \"Is there a certain player perspective that you prefer over others in games you play?\"\n},\n{\n\"mr\": \"inform(name[FIFA 12], esrb[E (for Everyone)], genres[simulation, sport], player_perspective[bird view, side view], platforms[PlayStation, Xbox, Nintendo, PC], available_on_steam[no])\",\n\"ref\": \"Fifa 12 is a decent sports simulator. It's pretty cool how the game swaps from the bird's eye perspective down to a side view while you're playing. You can get the game for PlayStation, Xbox, Nintendo consoles, and PC, but unfortunately it's not on Steam. Of course, as a sports game there's not much objectionable content so it's rated E.\"\n},\n{\n\"mr\": \"inform(name[Super Bomberman], release_year[1993], genres[action, strategy], has_multiplayer[no], platforms[Nintendo, PC], available_on_steam[no], has_linux_release[no], has_mac_release[no])\",\n\"ref\": \"Super Bomberman is one of my favorite Nintendo games, also available on PC, though not through Steam. It came out all the way back in 1993, and you can't get it for any modern consoles, unfortunately, so no online multiplayer, or of course Linux or Mac releases either. That said, it's still one of the most addicting action-strategy games out there.\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eViGGO is a fairly small dataset but includes a greater variety of utterance types than most other\n              datasets for NLG from structured meaning representations. This makes it more interesting from the\n              perspective of model evaluation, since models have to learn to differentiate between various dialogue act\n              types that share the same slots.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eViGGO's language is more casual and conversational -- as opposed to information-seeking -- which\n              differentiates it from the majority of popular datasets for the same type of data-to-text task. Moreover,\n              the video game domain is a rather uncommon one in the NLG community, despite being very well-suited for\n              data-to-text generation, considering it offers entities with many attributes to talk about, which can be\n              described in a structured format.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"http://www.macs.hw.ac.uk/InteractionLab/E2E/\"\u003eE2E NLG Challenge\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eMR = meaning representation\u003c/li\u003e\n              \u003cli\u003eDA = dialogue act\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eBLEU\u003c/code\u003e, \u003ccode\u003eMETEOR\u003c/code\u003e, \u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eBLEURT\u003c/code\u003e,\n              \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSER (slot error rate): Indicates the proportion of missing/incorrect/duplicate/hallucinated slot mentions\n              in the utterances across a test set. The closer to zero a model scores in this metric, the more\n              semantically accurate its outputs are. This metric is typically calculated either manually on a small\n              sample of generated outputs, or heuristically using domain-specific regex rules and gazetteers.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/W19-8623/\"\u003eJuraska et al., 2019. ViGGO: A Video Game Corpus for\n                  Data-To-Text Generation in Open-Domain Conversation.\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.coling-main.218/\"\u003eHarkous et al., 2020. Have Your Text and Use\n                  It Too! End-to-End Neural Data-to-Text Generation with Semantic Fidelity.\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2020.emnlp-main.419/\"\u003eKedzie and McKeown, 2020. Controllable Meaning\n                  Representation to Text Generation: Linearization and Data Augmentation Strategies.\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://aclanthology.org/2021.inlg-1.45/\"\u003eJuraska and Walker, 2021. Attention Is Indeed All\n                  You Need: Semantically Attention-Guided Decoding for Data-to-Text NLG.\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe primary motivation behind ViGGO was to create a data-to-text corpus in a new but conversational\n              domain, and intended for use in open-domain chatbots rather than task-oriented dialogue systems. To this\n              end, the dataset contains utterances of 9 generalizable and conversational dialogue act types, revolving\n              around various aspects of video games. The idea is that similar, relatively small datasets could fairly\n              easily be collected for other conversational domains -- especially other entertainment domains (such as\n              music or books), but perhaps also topics like animals or food -- to support an open-domain conversational\n              agent with controllable neural NLG.\u003c/p\u003e\n            \u003cp\u003eAnother desired quality of the ViGGO dataset was cleanliness (no typos and grammatical errors) and\n              semantic accuracy, which has often not been the case with other crowdsourced data-to-text corpora. In\n              general, for the data-to-text generation task, there is arguably no need to put the burden on the\n              generation model to figure out the noise, since the noise would not be expected to be there in a\n              real-world system whose dialogue manager that creates the input for the NLG module is usually configurable\n              and tightly controlled.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProduce a response from a structured meaning representation in the context of a conversation about video\n              games. It can be a brief opinion or a description of a game, as well as a request for attribute (e.g.,\n              genre, player perspective, or platform) preference/confirmation or an inquiry about liking a particular\n              type of games.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eCrowdsourced\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhere was it crowdsourced?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf crowdsourced, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe paid crowdworkers who produced the reference utterances were from English-speaking countries, and\n              they had at least 1,000 HITs approved and a HIT approval rate of 98% or more. Furthermore, in the\n              instructions, crowdworkers were discouraged from taking on the task unless they considered themselves a\n              gamer.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset focuses on video games and their various aspects, and hence the language of the utterances\n              may contain video game-specific jargon.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curator\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFirst, regular expressions were used to enforce several standardization policies regarding special\n              characters, punctuation, and the correction of undesired abbreviations/misspellings of standard\n              domain-specific terms (e.g., terms like \"Play station\" or \"PS4\" would be changed to the uniform\n              \"PlayStation\"). At the same time, hyphens were removed or enforced uniformly in certain terms, for\n              example, \"single-player\". Although phrases such as \"first person\" should correctly have a hyphen when used\n              as adjective, the crowdworkers used this rule very inconsistently. In order to avoid model outputs being\n              penalized during the evaluation by the arbitrary choice of a hyphen presence or absence in the reference\n              utterances, the hyphen was removed in all such phrases regardless of the noun vs. adjective use.\u003c/p\u003e\n            \u003cp\u003eSecond, an extensive set of heuristics was developed to identify slot-related errors. This process\n              revealed the vast majority of missing or incorrect slot mentions, which were subsequently fixed according\n              to the corresponding MRs. This eventually led to the development of a robust, cross-domain, heuristic slot\n              aligner that can be used for automatic slot error rate evaluation. For details, see the appendix in \u003ca\n                href=\"https://aclanthology.org/2021.inlg-1.45/\"\u003eJuraska and Walker, 2021\u003c/a\u003e.\u003c/p\u003e\n            \u003cp\u003eCrowdworkers would sometimes also inject a piece of information which was not present in the MR, some of\n              which is not even represented by any of the slots, e.g., plot or main characters. This unsolicited\n              information was removed from the utterances so as to avoid confusing the neural model. Finally, any\n              remaining typos and grammatical errors were resolved.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003emanually\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCompliance with the indicated dialogue act type, semantic accuracy (i.e., all information in the\n              corresponding MR mentioned and that correctly), and minimal extraneous information (e.g., personal\n              experience/opinion). Whenever it was within a reasonable amount of effort, the utterances were manually\n              fixed instead of being discarded/crowdsourced anew.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCrowdworkers were instructed to only express the information in the provided meaning representation,\n              which never prompted them to mention anything about themselves. Occasionally, they would still include a\n              bit of personal experience (e.g., \"I used to like the game as a kid.\") or opinion, but these would be too\n              general to be considered PII.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is limited to a single domain: video games. One caveat of using a language generator trained\n              on this dataset in a dialogue system as-is is that multiple subsequent turns discussing the same video\n              game would be repeating its full name. ViGGO was designed for generation without context, and therefore it\n              is up to the dialogue manager to ensure that pronouns are substituted for the names whenever it would\n              sound more natural in a dialogue. Alternately, the dataset can easily be augmented with automatically\n              constructed samples which omit the \u003ccode\u003ename\u003c/code\u003e slot in the MR and replace the name with a pronoun in\n              the reference utterance.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"viggo","type":"Data-to-Text","languages":"English","summary":"ViGGO is an English data-to-text generation dataset in the video game domain, with target responses being more conversational than information-seeking, yet constrained to the information presented in a meaning representation. The dataset is relatively small with about 5,000 datasets but very clean, and can thus serve for evaluating transfer learning, low-resource, or few-shot capabilities of neural models."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"viggo"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/web_nlg.html b/data_cards/web_nlg.html
index 893f6e19..8e06ecf3 100644
--- a/data_cards/web_nlg.html
+++ b/data_cards/web_nlg.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->web_nlg</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">web_nlg</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->web_nlg</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">web_nlg</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data-to-Text</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1901,4 +1901,4 @@ <h5>Unsuited Applications
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"web_nlg","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eweb_nlg\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eWebNLG is a bi-lingual dataset (English, Russian) of parallel DBpedia triple sets and short texts that cover\n          about 450 different DBpedia properties. The WebNLG data was originally created to promote the development of\n          RDF verbalisers able to generate short text and to handle micro-planning (i.e., sentence segmentation and\n          ordering, referring expression generation, aggregation); the goal of the task is to generate texts starting\n          from 1 to 7 input triples which have entities in common (so the input is actually a connected Knowledge\n          Graph). The dataset contains about 17,000 triple sets and 45,000 crowdsourced texts in English, and 7,000\n          triples sets and 19,000 crowdsourced texts in Russian. A challenging test set section with entities and/or\n          properties that have not been seen at training time is available.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/web_nlg')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/web_nlg\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://webnlg-challenge.loria.fr/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://www.aclweb.org/anthology/P17-1017\"\u003eFirst Dataset Release\u003c/a\u003e, \u003ca\n            href=\"https://www.aclweb.org/anthology/W17-3518/\"\u003eWebNLG Challenge 2017 Report\u003c/a\u003e, \u003ca\n            href=\"https://webnlg-challenge.loria.fr/files/2020.webnlg-papers.7.pdf\"\u003eWebNLG Challenge 2020 Report\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe principle curator of the dataset is Anastasia Shimorina (Université de Lorraine / LORIA, France).\n          Throughout the WebNLG releases, several people contributed to their construction: Claire Gardent (CNRS /\n          LORIA, France), Shashi Narayan (Google, UK), Laura Perez-Beltrachini (University of Edinburgh, UK), Elena\n          Khasanova, and Thiago Castro Ferreira (Federal University of Minas Gerais, Brazil).\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eA model should verbalize all and only the provided input triples in natural language.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://webnlg-challenge.loria.fr/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://gitlab.com/shimorina/webnlg-dataset\"\u003eGitlab\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://www.aclweb.org/anthology/P17-1017\"\u003eFirst Dataset Release\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W17-3518/\"\u003eWebNLG Challenge 2017 Report\u003c/a\u003e, \u003ca\n                href=\"https://webnlg-challenge.loria.fr/files/2020.webnlg-papers.7.pdf\"\u003eWebNLG Challenge 2020 Report\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eInitial release of the dataset:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{gardent2017creating,\nauthor = \t\"Gardent, Claire\nand Shimorina, Anastasia\nand Narayan, Shashi\nand Perez-Beltrachini, Laura\",\ntitle = \t\"Creating Training Corpora for NLG Micro-Planners\",\nbooktitle = \t\"Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)\",\nyear = \t\"2017\",\npublisher = \t\"Association for Computational Linguistics\",\npages = \t\"179--188\",\nlocation = \t\"Vancouver, Canada\",\ndoi = \t\"10.18653/v1/P17-1017\",\nurl = \t\"http://www.aclweb.org/anthology/P17-1017\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe latest version 3.0:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{castro-ferreira20:bilin-bi-direc-webnl-shared,\ntitle={The 2020 Bilingual, Bi-Directional WebNLG+ Shared Task Overview and Evaluation Results (WebNLG+ 2020)},\nauthor={Castro Ferreira, Thiago and\n            Gardent, Claire and\nIlinykh, Nikolai and\nvan der Lee, Chris and\nMille, Simon and\nMoussallem, Diego and\nShimorina, Anastasia},\nbooktitle = {Proceedings of the 3rd WebNLG Workshop on Natural Language Generation from the Semantic Web (WebNLG+ 2020)},\npages = \"55--76\",\nyear = \t 2020,\naddress = \t {Dublin, Ireland (Virtual)},\npublisher = {Association for Computational Linguistics}}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:webnlg-challenge@inria.fr\"\u003ewebnlg-challenge@inria.fr\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://beng.dice-research.org/gerbil/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe model outputs are evaluated against the crowdsourced references; the leaderboard reports BLEU-4,\n              METEOR, chrF++, TER, BERTScore and BLEURT scores.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe WebNLG dataset was created to promote the development (\u003cem\u003ei\u003c/em\u003e) of RDF verbalisers and\n              (\u003cem\u003eii\u003c/em\u003e) of microplanners able to handle a wide range of linguistic constructions. The dataset aims\n              at covering knowledge in different domains (\"categories\"). The same properties and entities can appear in\n              several categories.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA model should verbalize all and only the provided input triples in natural language.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversité de Lorraine / LORIA, France, CNRS / LORIA, France, University of Edinburgh, UK, Federal\n              University of Minas Gerais, Brazil\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe principle curator of the dataset is Anastasia Shimorina (Université de Lorraine / LORIA, France).\n              Throughout the WebNLG releases, several people contributed to their construction: Claire Gardent (CNRS /\n              LORIA, France), Shashi Narayan (Google, UK), Laura Perez-Beltrachini (University of Edinburgh, UK), Elena\n              Khasanova, and Thiago Castro Ferreira (Federal University of Minas Gerais, Brazil).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset construction was funded by the French National Research Agency (ANR).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille and Sebastian Gehrmann added the dataset and wrote the data card.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSee \u003ca href=\"https://webnlg-challenge.loria.fr/docs/\"\u003eofficial documentation\u003c/a\u003e.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003eentry\u003c/code\u003e: a data instance of the benchmark. Each entry has five attributes: a DBpedia category\n              (\u003ccode\u003ecategory\u003c/code\u003e), entry ID (\u003ccode\u003eeid\u003c/code\u003e), shape, shape type, and triple set size\n              (\u003ccode\u003esize\u003c/code\u003e).\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eshape\u003c/code\u003e: a string representation of the RDF tree with nested parentheses where\n                  \u003ccode\u003eX\u003c/code\u003e is a node (see \u003ca href=\"https://en.wikipedia.org/wiki/Newick_format\"\u003eNewick tree\n                    format\u003c/a\u003e).\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eshape_type\u003c/code\u003e: a type of the tree shape. We \u003ca\n                    href=\"https://www.aclweb.org/anthology/C16-1141.pdf\"\u003eidentify\u003c/a\u003e three types of tree shapes:\u003c/p\u003e\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003echain\u003c/code\u003e (the object of one triple is the subject of the other);\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003esibling\u003c/code\u003e (triples with a shared subject);\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003emixed\u003c/code\u003e (both \u003ccode\u003echain\u003c/code\u003e and \u003ccode\u003esibling\u003c/code\u003e types present).\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eeid\u003c/code\u003e: an entry ID. It is unique only within a category and a size.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003ecategory\u003c/code\u003e: a DBpedia category (Astronaut, City, MusicalWork, Politician, etc.).\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003esize\u003c/code\u003e: the number of RDF triples in a set. Ranges from 1 to 7.\u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eEach \u003ccode\u003eentry\u003c/code\u003e has three fields: \u003ccode\u003eoriginaltripleset\u003c/code\u003e, \u003ccode\u003emodifiedtripleset\u003c/code\u003e,\n              and \u003ccode\u003elexs\u003c/code\u003e.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003eoriginaltripleset\u003c/code\u003e: a set of RDF triples as extracted from \u003ca\n                href=\"https://wiki.dbpedia.org/\"\u003eDBpedia\u003c/a\u003e. Each set of RDF triples is a tree. Triples have the\n              subject-predicate-object structure.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003emodifiedtripleset\u003c/code\u003e: a set of RDF triples as presented to crowdworkers (for more details on\n              modifications, see below).\u003c/p\u003e\n            \u003cp\u003eOriginal and modified triples serve different purposes: the original triples — to link data to a\n              knowledge base (DBpedia), whereas the modified triples — to ensure consistency and homogeneity throughout\n              the data. To train models, the modified triples should be used.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003elexs\u003c/code\u003e (shortened for lexicalisations): a natural language text verbalising the triples. Each\n              lexicalisation has two attributes: a comment (\u003ccode\u003ecomment\u003c/code\u003e), and a lexicalisation ID\n              (\u003ccode\u003elid\u003c/code\u003e). By default, comments have the value \u003ccode\u003egood\u003c/code\u003e, except rare cases when they\n              were manually marked as \u003ccode\u003etoFix\u003c/code\u003e. That was done during the corpus creation, when it was seen\n              that a lexicalisation did not exactly match a triple set.\u003c/p\u003e\n            \u003cp\u003eRussian data has additional optional fields comparing to English:\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003e\u0026#x3C;dbpedialinks\u003e\u003c/code\u003e: RDF triples extracted from DBpedia between English and Russian entities\n              by means of the property \u003ccode\u003esameAs\u003c/code\u003e.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003e\u0026#x3C;links\u003e\u003c/code\u003e: RDF triples created manually for some entities to serve as pointers to\n              translators. There are two types of them:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003cp\u003ewith \u003ccode\u003esameAs\u003c/code\u003e (\u003ccode\u003eSpaniards | sameAs | испанцы\u003c/code\u003e)\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003ewith \u003ccode\u003eincludes\u003c/code\u003e\n                  (\u003ccode\u003eTomatoes, guanciale, cheese, olive oil | includes | гуанчиале\u003c/code\u003e). Those were mostly\n                  created for string literals to translate some parts of them.\u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eLexicalisations in the Russian WebNLG have a new parameter \u003ccode\u003elang\u003c/code\u003e (values: \u003ccode\u003een\u003c/code\u003e,\n              \u003ccode\u003eru\u003c/code\u003e) because original English texts were kept in the Russian version (see the example above).\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"entry\": {\n\"category\": \"Company\",\n\"size\": \"4\",\n\"shape\": \"(X (X) (X) (X) (X))\",\n\"shape_type\": \"sibling\",\n\"eid\": \"Id21\",\n\"lexs\": [\n{\n\"comment\": \"good\",\n\"lex\": \"Trane, which was founded on January 1st 1913 in La Crosse, Wisconsin, is based in Ireland. It has 29,000 employees.\",\n\"lid\": \"Id1\"\n}\n],\n\"modifiedtripleset\": [\n{\n\"subject\": \"Trane\",\n\"property\": \"foundingDate\",\n\"object\": \"1913-01-01\"\n},\n{\n\"subject\": \"Trane\",\n\"property\": \"location\",\n\"object\": \"Ireland\"\n},\n{\n\"subject\": \"Trane\",\n\"property\": \"foundationPlace\",\n\"object\": \"La_Crosse,_Wisconsin\"\n},\n{\n\"subject\": \"Trane\",\n\"property\": \"numberOfEmployees\",\n\"object\": \"29000\"\n}\n\n],\n\"originaltriplesets\": {\n\"originaltripleset\": [\n  {\n\"subject\": \"Trane\",\n\"property\": \"foundingDate\",\n\"object\": \"1913-01-01\"\n  },\n  {\n\"subject\": \"Trane\",\n\"property\": \"location\",\n\"object\": \"Ireland\"\n  },\n  {\n\"subject\": \"Trane\",\n\"property\": \"foundationPlace\",\n\"object\": \"La_Crosse,_Wisconsin\"\n  },\n  {\n\"subject\": \"Trane\",\n\"property\": \"numberOfEmployees\",\n\"object\": \"29000\"\n  }\n]\n}\n\n}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe XML-formatted example is \u003ca href=\"https://webnlg-challenge.loria.fr/docs/#example\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eEnglish (v3.0)\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003etriple sets\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e13,211\u003c/td\u003e\n                    \u003ctd\u003e1,667\u003c/td\u003e\n                    \u003ctd\u003e1,779\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003etexts\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e35,426\u003c/td\u003e\n                    \u003ctd\u003e4,464\u003c/td\u003e\n                    \u003ctd\u003e5,150\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eproperties\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e372\u003c/td\u003e\n                    \u003ctd\u003e290\u003c/td\u003e\n                    \u003ctd\u003e220\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eRussian (v3.0)\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003etriple sets\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e5,573\u003c/td\u003e\n                    \u003ctd\u003e790\u003c/td\u003e\n                    \u003ctd\u003e1,102\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003etexts\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e14,239\u003c/td\u003e\n                    \u003ctd\u003e2,026\u003c/td\u003e\n                    \u003ctd\u003e2,780\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eproperties\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e226\u003c/td\u003e\n                    \u003ctd\u003e115\u003c/td\u003e\n                    \u003ctd\u003e192\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDue to the constrained generation task, this dataset can be used to evaluate very specific and narrow\n              generation capabilities.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe RDF-triple format is unique to WebNLG.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esurface realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo changes to the main content of the dataset. The \u003ca\n                href=\"https://gitlab.com/shimorina/webnlg-dataset/-/tree/master/release_v3.0\"\u003eversion 3.0\u003c/a\u003e of the\n              dataset is used.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e23 special test sets for WebNLG were added to the GEM evaluation suite, 12 for English and 11 for\n              Russian.\n              For both languages, we created subsets of the training and development sets of ~500 randomly selected\n              inputs each. The inputs were sampled proportionally from each category.\u003c/p\u003e\n            \u003cp\u003eTwo types of transformations have been applied to WebNLG: (i) input scrambling (English and Russian) and\n              (ii) numerical value replacements (English); in both cases, a subset of about 500 inputs was randomly\n              selected. For (i), the order of the triples was randomly reassigned (each triple kept the same\n              Subject-Property-Object internal order). For (ii), the change was performed respecting the format of the\n              current cardinal value (e.g., alpha, integer, or floating-point) and replacing it with a new random value.\n              The new number is lower-bounded between zero and upper bounded to be within to the highest power of 10\n              unit for the given value (e.g., replacing 54 would result in a random value between 0-100). Floating\n              values maintain the degree of precision.\u003c/p\u003e\n            \u003cp\u003eFor both languages, we did identify different subsets of the test set that we could compare to each other\n              so that we would have a better understanding of the results. There are currently 8 selections that we have\n              made:\u003c/p\u003e\n            \u003cp\u003eSelection 1 (size): input length. This selection corresponds to the number of predicates in the input. By\n              comparing inputs of different lengths, we can see to what extent NLG systems are able to handle different\n              input sizes. The table below provides the relevant frequencies. Please be aware that comparing selections\n              with fewer than 100 items may result in unreliable comparisons.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput length\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                    \u003ctd\u003e369\u003c/td\u003e\n                    \u003ctd\u003e254\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e349\u003c/td\u003e\n                    \u003ctd\u003e200\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e350\u003c/td\u003e\n                    \u003ctd\u003e214\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e305\u003c/td\u003e\n                    \u003ctd\u003e214\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e213\u003c/td\u003e\n                    \u003ctd\u003e159\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e114\u003c/td\u003e\n                    \u003ctd\u003e32\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e79\u003c/td\u003e\n                    \u003ctd\u003e29\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 2 (frequency): seen/unseen single predicates. This selection corresponds to the inputs with\n              only one predicate. We compare which predicates are seen/unseen in the training data. The table below\n              provides the relevant frequencies. Note that the comparison is only valid for English. Not for Russian,\n              since there is only one example of unseen single predicates.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e_ in training\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSeen\u003c/td\u003e\n                    \u003ctd\u003e297\u003c/td\u003e\n                    \u003ctd\u003e253\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUnseen\u003c/td\u003e\n                    \u003ctd\u003e72\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 3 (frequency): seen/unseen combinations of predicates. This selection checks for all\n              combinations of predicates whether that combination has been seen in the training data. For example: if\n              the combination of predicates A and B is seen, that means that there is an input in the training data\n              consisting of two triples, where one triple uses predicate A and the other uses predicate B. If the\n              combination is unseen, then the converse is true. The table below provides the relevant frequencies.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e_ in training\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunseen\u003c/td\u003e\n                    \u003ctd\u003e1295\u003c/td\u003e\n                    \u003ctd\u003e354\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eseen\u003c/td\u003e\n                    \u003ctd\u003e115\u003c/td\u003e\n                    \u003ctd\u003e494\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 4 (frequency): seen/unseen arguments. This selection checks for all input whether or not all\n              arg1s and arg2s in the input have been seen during the training phase. For this selection, \u003cem\u003eSeen\u003c/em\u003e\n              is the default. Only if all arg1 instances for a particular input are unseen, do we count the arg1s of the\n              input as unseen. The same holds for arg2. So \"seen\" here really means that at least some of the arg1s or\n              arg2s are seen in the input. The table below provides the relevant frequencies. Note that the comparison\n              is only valid for English. Not for Russian, since there are very few examples of unseen combinations of\n              predicates.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eArguments seen in training?\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eboth_seen\u003c/td\u003e\n                    \u003ctd\u003e518\u003c/td\u003e\n                    \u003ctd\u003e1075\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eboth_unseen\u003c/td\u003e\n                    \u003ctd\u003e1177\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003earg1_unseen\u003c/td\u003e\n                    \u003ctd\u003e56\u003c/td\u003e\n                    \u003ctd\u003e19\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003earg2_unseen\u003c/td\u003e\n                    \u003ctd\u003e28\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 5 (shape): repeated subjects. For this selection, the subsets are based on the times a subject\n              is repeated in the input; it only takes into account the maximum number of times a subject is repeated,\n              that is, if in one input a subject appears 3 times and a different subject 2 times, this input will be in\n              the \"3_subjects_same' split. Unique_subjects means all subjects are different.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eMax num. of repeated subjects\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunique_subjects\u003c/td\u003e\n                    \u003ctd\u003e453\u003c/td\u003e\n                    \u003ctd\u003e339\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e414\u003c/td\u003e\n                    \u003ctd\u003e316\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e382\u003c/td\u003e\n                    \u003ctd\u003e217\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e251\u003c/td\u003e\n                    \u003ctd\u003e143\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e158\u003c/td\u003e\n                    \u003ctd\u003e56\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e80\u003c/td\u003e\n                    \u003ctd\u003e19\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e41\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 6 (shape): repeated objects. Same as for subjects above, but for objects. There are much less\n              cases of repeated objects, so there are only two categories for this selection, unique_objects and\n              some_objects_repeated; for the latter, we have up to 3 coreferring objects in English, and XXX in Russian.\n            \u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eMax num. of repeated objects\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunique_objects\u003c/td\u003e\n                    \u003ctd\u003e1654\u003c/td\u003e\n                    \u003ctd\u003e1099\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esome_objects_same\u003c/td\u003e\n                    \u003ctd\u003e125\u003c/td\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 7 (shape): repeated properties. Same as for objects above, but for properties; up to two\n              properties can be the same in English, up to XXX in Russian.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eMax num. of repeated properties\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunique_properties\u003c/td\u003e\n                    \u003ctd\u003e1510\u003c/td\u003e\n                    \u003ctd\u003e986\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esome_properties_same\u003c/td\u003e\n                    \u003ctd\u003e269\u003c/td\u003e\n                    \u003ctd\u003e116\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 8 (shape): entities that appear both as subject and object. For this selection, we grouped\n              together the inputs in which no entity is found as both subject and object, and on the other side inputs\n              in which one or more entity/ies appear both as subject and as object. We found up to two such entities per\n              input in English, and up to XXX in Russian.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eMax num. of objects and subjects in common\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunique_properties\u003c/td\u003e\n                    \u003ctd\u003e1322\u003c/td\u003e\n                    \u003ctd\u003e642\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esome_properties_same\u003c/td\u003e\n                    \u003ctd\u003e457\u003c/td\u003e\n                    \u003ctd\u003e460\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRobustness\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDataset construction: \u003ca href=\"https://www.aclweb.org/anthology/P17-1017/\"\u003emain dataset paper\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/C16-1141/\"\u003eRDF triple extraction\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W19-3706/\"\u003eRussian translation\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eWebNLG Challenge 2017: \u003ca href=\"https://webnlg-challenge.loria.fr/challenge_2017/\"\u003ewebpage\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W17-3518/\"\u003epaper\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eWebNLG Challenge 2020: \u003ca href=\"https://webnlg-challenge.loria.fr/challenge_2020/\"\u003ewebpage\u003c/a\u003e, \u003ca\n                href=\"https://webnlg-challenge.loria.fr/files/2020.webnlg-papers.7.pdf\"\u003epaper\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eEnriched version of WebNLG: \u003ca href=\"https://github.com/ThiagoCF05/webnlg\"\u003erepository\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W18-6521/\"\u003epaper\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eRelated research papers: \u003ca href=\"https://webnlg-challenge.loria.fr/research/\"\u003ewebpage\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor both languages, the participating systems are automatically evaluated in a multi-reference scenario.\n              Each English hypothesis is compared to a maximum of 5 references, and each Russian one to a maximum of 7\n              references. On average, English data has 2.89 references per test instance, and Russian data has 2.52\n              references per instance.\u003c/p\u003e\n            \u003cp\u003eIn a human evaluation, example are uniformly sampled across size of triple sets and the following\n              dimensions are assessed (on MTurk and Yandex.Toloka):\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eData Coverage: Does the text include descriptions of all predicates presented in the data?\u003c/li\u003e\n              \u003cli\u003eRelevance: Does the text describe only such predicates (with related subjects and objects), which are\n                found in the data?\u003c/li\u003e\n              \u003cli\u003eCorrectness: When describing predicates which are found in the data, does the text mention correct the\n                objects and adequately introduces the subject for this specific predicate?\u003c/li\u003e\n              \u003cli\u003eText Structure: Is the text grammatical, well-structured, written in acceptable English language?\u003c/li\u003e\n              \u003cli\u003eFluency: Is it possible to say that the text progresses naturally, forms a coherent whole and it is\n                easy to understand the text?\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eFor additional information like the instructions, we refer to the original paper.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe evaluated a wide range of models as part of the GEM benchmark.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eResults can be found on the \u003ca href=\"https://gem-benchmark.com/results\"\u003eGEM website\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes - related tasks\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSocial Impact Observations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid any of these previous uses result in observations about the social impact of the systems? In\n                    particular, has there been work outlining the risks and limitations of the system? Provide links and\n                    descriptions here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe do not foresee any negative social impact in particular from this dataset or task.\u003c/p\u003e\n            \u003cp\u003ePositive outlooks: Being able to generate good quality text from RDF data would permit, e.g., making this\n              data more accessible to lay users, enriching existing text with information drawn from knowledge bases\n              such as DBpedia or describing, comparing and relating entities present in these knowledge bases.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is created using DBpedia RDF triples which naturally exhibit biases that have been found to\n              exist in Wikipedia such as some forms of, e.g., gender bias.\u003c/p\u003e\n            \u003cp\u003eThe choice of \u003ca\n                href=\"https://gitlab.com/shimorina/webnlg-dataset/-/blob/master/supplementary/entities_dict.json\"\u003eentities\u003c/a\u003e,\n              described by RDF trees, was not controlled. As such, they may contain gender biases; for instance, all the\n              astronauts described by RDF triples are male. Hence, in texts, pronouns \u003cem\u003ehe/him/his\u003c/em\u003e occur more\n              often. Similarly, entities can be related to the Western culture more often than to other cultures.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn English, the dataset is limited to the language that crowdraters speak. In Russian, the language is\n              heavily biased by the translationese of the translation system that is post-edited.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere is no PII in this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe quality of the crowdsourced references is limited, in particular in terms of fluency/naturalness of\n              the collected texts.\u003c/p\u003e\n            \u003cp\u003eRussian data was machine-translated and then post-edited by crowdworkers, so some examples may still\n              exhibit issues related to bad translations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOnly a limited number of domains are covered in this dataset. As a result, it cannot be used as a\n              general-purpose realizer.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"web_nlg","type":"Data-to-Text","languages":"Russian, English","summary":"WebNLG is a bi-lingual dataset (English, Russian) of parallel DBpedia triple sets and short texts that cover about 450 different DBpedia properties. The WebNLG data was originally created to promote the development of RDF verbalisers able to generate short text and to handle micro-planning (i.e., sentence segmentation and ordering, referring expression generation, aggregation); the goal of the task is to generate texts starting from 1 to 7 input triples which have entities in common (so the input is actually a connected Knowledge Graph). The dataset contains about 17,000 triple sets and 45,000 crowdsourced texts in English, and 7,000 triples sets and 19,000 crowdsourced texts in Russian. A challenging test set section with entities and/or properties that have not been seen at training time is available."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"web_nlg"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"web_nlg","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003eweb_nlg\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eWebNLG is a bi-lingual dataset (English, Russian) of parallel DBpedia triple sets and short texts that cover\n          about 450 different DBpedia properties. The WebNLG data was originally created to promote the development of\n          RDF verbalisers able to generate short text and to handle micro-planning (i.e., sentence segmentation and\n          ordering, referring expression generation, aggregation); the goal of the task is to generate texts starting\n          from 1 to 7 input triples which have entities in common (so the input is actually a connected Knowledge\n          Graph). The dataset contains about 17,000 triple sets and 45,000 crowdsourced texts in English, and 7,000\n          triples sets and 19,000 crowdsourced texts in Russian. A challenging test set section with entities and/or\n          properties that have not been seen at training time is available.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/web_nlg')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/web_nlg\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://webnlg-challenge.loria.fr/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"http://www.aclweb.org/anthology/P17-1017\"\u003eFirst Dataset Release\u003c/a\u003e, \u003ca\n            href=\"https://www.aclweb.org/anthology/W17-3518/\"\u003eWebNLG Challenge 2017 Report\u003c/a\u003e, \u003ca\n            href=\"https://webnlg-challenge.loria.fr/files/2020.webnlg-papers.7.pdf\"\u003eWebNLG Challenge 2020 Report\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe principle curator of the dataset is Anastasia Shimorina (Université de Lorraine / LORIA, France).\n          Throughout the WebNLG releases, several people contributed to their construction: Claire Gardent (CNRS /\n          LORIA, France), Shashi Narayan (Google, UK), Laura Perez-Beltrachini (University of Edinburgh, UK), Elena\n          Khasanova, and Thiago Castro Ferreira (Federal University of Minas Gerais, Brazil).\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eA model should verbalize all and only the provided input triples in natural language.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://webnlg-challenge.loria.fr/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://gitlab.com/shimorina/webnlg-dataset\"\u003eGitlab\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://www.aclweb.org/anthology/P17-1017\"\u003eFirst Dataset Release\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W17-3518/\"\u003eWebNLG Challenge 2017 Report\u003c/a\u003e, \u003ca\n                href=\"https://webnlg-challenge.loria.fr/files/2020.webnlg-papers.7.pdf\"\u003eWebNLG Challenge 2020 Report\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eInitial release of the dataset:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{gardent2017creating,\nauthor = \t\"Gardent, Claire\nand Shimorina, Anastasia\nand Narayan, Shashi\nand Perez-Beltrachini, Laura\",\ntitle = \t\"Creating Training Corpora for NLG Micro-Planners\",\nbooktitle = \t\"Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)\",\nyear = \t\"2017\",\npublisher = \t\"Association for Computational Linguistics\",\npages = \t\"179--188\",\nlocation = \t\"Vancouver, Canada\",\ndoi = \t\"10.18653/v1/P17-1017\",\nurl = \t\"http://www.aclweb.org/anthology/P17-1017\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe latest version 3.0:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{castro-ferreira20:bilin-bi-direc-webnl-shared,\ntitle={The 2020 Bilingual, Bi-Directional WebNLG+ Shared Task Overview and Evaluation Results (WebNLG+ 2020)},\nauthor={Castro Ferreira, Thiago and\n            Gardent, Claire and\nIlinykh, Nikolai and\nvan der Lee, Chris and\nMille, Simon and\nMoussallem, Diego and\nShimorina, Anastasia},\nbooktitle = {Proceedings of the 3rd WebNLG Workshop on Natural Language Generation from the Semantic Web (WebNLG+ 2020)},\npages = \"55--76\",\nyear = \t 2020,\naddress = \t {Dublin, Ireland (Virtual)},\npublisher = {Association for Computational Linguistics}}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:webnlg-challenge@inria.fr\"\u003ewebnlg-challenge@inria.fr\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://beng.dice-research.org/gerbil/\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe model outputs are evaluated against the crowdsourced references; the leaderboard reports BLEU-4,\n              METEOR, chrF++, TER, BERTScore and BLEURT scores.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-4.0: Creative Commons Attribution Non Commercial 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe WebNLG dataset was created to promote the development (\u003cem\u003ei\u003c/em\u003e) of RDF verbalisers and\n              (\u003cem\u003eii\u003c/em\u003e) of microplanners able to handle a wide range of linguistic constructions. The dataset aims\n              at covering knowledge in different domains (\"categories\"). The same properties and entities can appear in\n              several categories.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eData-to-Text\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eA model should verbalize all and only the provided input triples in natural language.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversité de Lorraine / LORIA, France, CNRS / LORIA, France, University of Edinburgh, UK, Federal\n              University of Minas Gerais, Brazil\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe principle curator of the dataset is Anastasia Shimorina (Université de Lorraine / LORIA, France).\n              Throughout the WebNLG releases, several people contributed to their construction: Claire Gardent (CNRS /\n              LORIA, France), Shashi Narayan (Google, UK), Laura Perez-Beltrachini (University of Edinburgh, UK), Elena\n              Khasanova, and Thiago Castro Ferreira (Federal University of Minas Gerais, Brazil).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset construction was funded by the French National Research Agency (ANR).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimon Mille and Sebastian Gehrmann added the dataset and wrote the data card.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSee \u003ca href=\"https://webnlg-challenge.loria.fr/docs/\"\u003eofficial documentation\u003c/a\u003e.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003eentry\u003c/code\u003e: a data instance of the benchmark. Each entry has five attributes: a DBpedia category\n              (\u003ccode\u003ecategory\u003c/code\u003e), entry ID (\u003ccode\u003eeid\u003c/code\u003e), shape, shape type, and triple set size\n              (\u003ccode\u003esize\u003c/code\u003e).\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eshape\u003c/code\u003e: a string representation of the RDF tree with nested parentheses where\n                  \u003ccode\u003eX\u003c/code\u003e is a node (see \u003ca href=\"https://en.wikipedia.org/wiki/Newick_format\"\u003eNewick tree\n                    format\u003c/a\u003e).\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eshape_type\u003c/code\u003e: a type of the tree shape. We \u003ca\n                    href=\"https://www.aclweb.org/anthology/C16-1141.pdf\"\u003eidentify\u003c/a\u003e three types of tree shapes:\u003c/p\u003e\n                \u003cul\u003e\n                  \u003cli\u003e\u003ccode\u003echain\u003c/code\u003e (the object of one triple is the subject of the other);\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003esibling\u003c/code\u003e (triples with a shared subject);\u003c/li\u003e\n                  \u003cli\u003e\u003ccode\u003emixed\u003c/code\u003e (both \u003ccode\u003echain\u003c/code\u003e and \u003ccode\u003esibling\u003c/code\u003e types present).\u003c/li\u003e\n                \u003c/ul\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003eeid\u003c/code\u003e: an entry ID. It is unique only within a category and a size.\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003ecategory\u003c/code\u003e: a DBpedia category (Astronaut, City, MusicalWork, Politician, etc.).\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003e\u003ccode\u003esize\u003c/code\u003e: the number of RDF triples in a set. Ranges from 1 to 7.\u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eEach \u003ccode\u003eentry\u003c/code\u003e has three fields: \u003ccode\u003eoriginaltripleset\u003c/code\u003e, \u003ccode\u003emodifiedtripleset\u003c/code\u003e,\n              and \u003ccode\u003elexs\u003c/code\u003e.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003eoriginaltripleset\u003c/code\u003e: a set of RDF triples as extracted from \u003ca\n                href=\"https://wiki.dbpedia.org/\"\u003eDBpedia\u003c/a\u003e. Each set of RDF triples is a tree. Triples have the\n              subject-predicate-object structure.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003emodifiedtripleset\u003c/code\u003e: a set of RDF triples as presented to crowdworkers (for more details on\n              modifications, see below).\u003c/p\u003e\n            \u003cp\u003eOriginal and modified triples serve different purposes: the original triples — to link data to a\n              knowledge base (DBpedia), whereas the modified triples — to ensure consistency and homogeneity throughout\n              the data. To train models, the modified triples should be used.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003elexs\u003c/code\u003e (shortened for lexicalisations): a natural language text verbalising the triples. Each\n              lexicalisation has two attributes: a comment (\u003ccode\u003ecomment\u003c/code\u003e), and a lexicalisation ID\n              (\u003ccode\u003elid\u003c/code\u003e). By default, comments have the value \u003ccode\u003egood\u003c/code\u003e, except rare cases when they\n              were manually marked as \u003ccode\u003etoFix\u003c/code\u003e. That was done during the corpus creation, when it was seen\n              that a lexicalisation did not exactly match a triple set.\u003c/p\u003e\n            \u003cp\u003eRussian data has additional optional fields comparing to English:\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003e\u0026#x3C;dbpedialinks\u003e\u003c/code\u003e: RDF triples extracted from DBpedia between English and Russian entities\n              by means of the property \u003ccode\u003esameAs\u003c/code\u003e.\u003c/p\u003e\n            \u003cp\u003e\u003ccode\u003e\u0026#x3C;links\u003e\u003c/code\u003e: RDF triples created manually for some entities to serve as pointers to\n              translators. There are two types of them:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003cp\u003ewith \u003ccode\u003esameAs\u003c/code\u003e (\u003ccode\u003eSpaniards | sameAs | испанцы\u003c/code\u003e)\u003c/p\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003cp\u003ewith \u003ccode\u003eincludes\u003c/code\u003e\n                  (\u003ccode\u003eTomatoes, guanciale, cheese, olive oil | includes | гуанчиале\u003c/code\u003e). Those were mostly\n                  created for string literals to translate some parts of them.\u003c/p\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eLexicalisations in the Russian WebNLG have a new parameter \u003ccode\u003elang\u003c/code\u003e (values: \u003ccode\u003een\u003c/code\u003e,\n              \u003ccode\u003eru\u003c/code\u003e) because original English texts were kept in the Russian version (see the example above).\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"entry\": {\n\"category\": \"Company\",\n\"size\": \"4\",\n\"shape\": \"(X (X) (X) (X) (X))\",\n\"shape_type\": \"sibling\",\n\"eid\": \"Id21\",\n\"lexs\": [\n{\n\"comment\": \"good\",\n\"lex\": \"Trane, which was founded on January 1st 1913 in La Crosse, Wisconsin, is based in Ireland. It has 29,000 employees.\",\n\"lid\": \"Id1\"\n}\n],\n\"modifiedtripleset\": [\n{\n\"subject\": \"Trane\",\n\"property\": \"foundingDate\",\n\"object\": \"1913-01-01\"\n},\n{\n\"subject\": \"Trane\",\n\"property\": \"location\",\n\"object\": \"Ireland\"\n},\n{\n\"subject\": \"Trane\",\n\"property\": \"foundationPlace\",\n\"object\": \"La_Crosse,_Wisconsin\"\n},\n{\n\"subject\": \"Trane\",\n\"property\": \"numberOfEmployees\",\n\"object\": \"29000\"\n}\n\n],\n\"originaltriplesets\": {\n\"originaltripleset\": [\n  {\n\"subject\": \"Trane\",\n\"property\": \"foundingDate\",\n\"object\": \"1913-01-01\"\n  },\n  {\n\"subject\": \"Trane\",\n\"property\": \"location\",\n\"object\": \"Ireland\"\n  },\n  {\n\"subject\": \"Trane\",\n\"property\": \"foundationPlace\",\n\"object\": \"La_Crosse,_Wisconsin\"\n  },\n  {\n\"subject\": \"Trane\",\n\"property\": \"numberOfEmployees\",\n\"object\": \"29000\"\n  }\n]\n}\n\n}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe XML-formatted example is \u003ca href=\"https://webnlg-challenge.loria.fr/docs/#example\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eEnglish (v3.0)\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003etriple sets\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e13,211\u003c/td\u003e\n                    \u003ctd\u003e1,667\u003c/td\u003e\n                    \u003ctd\u003e1,779\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003etexts\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e35,426\u003c/td\u003e\n                    \u003ctd\u003e4,464\u003c/td\u003e\n                    \u003ctd\u003e5,150\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eproperties\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e372\u003c/td\u003e\n                    \u003ctd\u003e290\u003c/td\u003e\n                    \u003ctd\u003e220\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eRussian (v3.0)\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003etriple sets\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e5,573\u003c/td\u003e\n                    \u003ctd\u003e790\u003c/td\u003e\n                    \u003ctd\u003e1,102\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003etexts\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e14,239\u003c/td\u003e\n                    \u003ctd\u003e2,026\u003c/td\u003e\n                    \u003ctd\u003e2,780\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e\u003cstrong\u003eproperties\u003c/strong\u003e\u003c/td\u003e\n                    \u003ctd\u003e226\u003c/td\u003e\n                    \u003ctd\u003e115\u003c/td\u003e\n                    \u003ctd\u003e192\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDue to the constrained generation task, this dataset can be used to evaluate very specific and narrow\n              generation capabilities.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe RDF-triple format is unique to WebNLG.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esurface realization\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo changes to the main content of the dataset. The \u003ca\n                href=\"https://gitlab.com/shimorina/webnlg-dataset/-/tree/master/release_v3.0\"\u003eversion 3.0\u003c/a\u003e of the\n              dataset is used.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e23 special test sets for WebNLG were added to the GEM evaluation suite, 12 for English and 11 for\n              Russian.\n              For both languages, we created subsets of the training and development sets of ~500 randomly selected\n              inputs each. The inputs were sampled proportionally from each category.\u003c/p\u003e\n            \u003cp\u003eTwo types of transformations have been applied to WebNLG: (i) input scrambling (English and Russian) and\n              (ii) numerical value replacements (English); in both cases, a subset of about 500 inputs was randomly\n              selected. For (i), the order of the triples was randomly reassigned (each triple kept the same\n              Subject-Property-Object internal order). For (ii), the change was performed respecting the format of the\n              current cardinal value (e.g., alpha, integer, or floating-point) and replacing it with a new random value.\n              The new number is lower-bounded between zero and upper bounded to be within to the highest power of 10\n              unit for the given value (e.g., replacing 54 would result in a random value between 0-100). Floating\n              values maintain the degree of precision.\u003c/p\u003e\n            \u003cp\u003eFor both languages, we did identify different subsets of the test set that we could compare to each other\n              so that we would have a better understanding of the results. There are currently 8 selections that we have\n              made:\u003c/p\u003e\n            \u003cp\u003eSelection 1 (size): input length. This selection corresponds to the number of predicates in the input. By\n              comparing inputs of different lengths, we can see to what extent NLG systems are able to handle different\n              input sizes. The table below provides the relevant frequencies. Please be aware that comparing selections\n              with fewer than 100 items may result in unreliable comparisons.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eInput length\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                    \u003ctd\u003e369\u003c/td\u003e\n                    \u003ctd\u003e254\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2\u003c/td\u003e\n                    \u003ctd\u003e349\u003c/td\u003e\n                    \u003ctd\u003e200\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                    \u003ctd\u003e350\u003c/td\u003e\n                    \u003ctd\u003e214\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                    \u003ctd\u003e305\u003c/td\u003e\n                    \u003ctd\u003e214\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e213\u003c/td\u003e\n                    \u003ctd\u003e159\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6\u003c/td\u003e\n                    \u003ctd\u003e114\u003c/td\u003e\n                    \u003ctd\u003e32\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e79\u003c/td\u003e\n                    \u003ctd\u003e29\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 2 (frequency): seen/unseen single predicates. This selection corresponds to the inputs with\n              only one predicate. We compare which predicates are seen/unseen in the training data. The table below\n              provides the relevant frequencies. Note that the comparison is only valid for English. Not for Russian,\n              since there is only one example of unseen single predicates.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e_ in training\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSeen\u003c/td\u003e\n                    \u003ctd\u003e297\u003c/td\u003e\n                    \u003ctd\u003e253\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUnseen\u003c/td\u003e\n                    \u003ctd\u003e72\u003c/td\u003e\n                    \u003ctd\u003e1\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 3 (frequency): seen/unseen combinations of predicates. This selection checks for all\n              combinations of predicates whether that combination has been seen in the training data. For example: if\n              the combination of predicates A and B is seen, that means that there is an input in the training data\n              consisting of two triples, where one triple uses predicate A and the other uses predicate B. If the\n              combination is unseen, then the converse is true. The table below provides the relevant frequencies.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e_ in training\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunseen\u003c/td\u003e\n                    \u003ctd\u003e1295\u003c/td\u003e\n                    \u003ctd\u003e354\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eseen\u003c/td\u003e\n                    \u003ctd\u003e115\u003c/td\u003e\n                    \u003ctd\u003e494\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 4 (frequency): seen/unseen arguments. This selection checks for all input whether or not all\n              arg1s and arg2s in the input have been seen during the training phase. For this selection, \u003cem\u003eSeen\u003c/em\u003e\n              is the default. Only if all arg1 instances for a particular input are unseen, do we count the arg1s of the\n              input as unseen. The same holds for arg2. So \"seen\" here really means that at least some of the arg1s or\n              arg2s are seen in the input. The table below provides the relevant frequencies. Note that the comparison\n              is only valid for English. Not for Russian, since there are very few examples of unseen combinations of\n              predicates.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eArguments seen in training?\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eboth_seen\u003c/td\u003e\n                    \u003ctd\u003e518\u003c/td\u003e\n                    \u003ctd\u003e1075\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eboth_unseen\u003c/td\u003e\n                    \u003ctd\u003e1177\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003earg1_unseen\u003c/td\u003e\n                    \u003ctd\u003e56\u003c/td\u003e\n                    \u003ctd\u003e19\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003earg2_unseen\u003c/td\u003e\n                    \u003ctd\u003e28\u003c/td\u003e\n                    \u003ctd\u003e4\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 5 (shape): repeated subjects. For this selection, the subsets are based on the times a subject\n              is repeated in the input; it only takes into account the maximum number of times a subject is repeated,\n              that is, if in one input a subject appears 3 times and a different subject 2 times, this input will be in\n              the \"3_subjects_same' split. Unique_subjects means all subjects are different.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eMax num. of repeated subjects\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunique_subjects\u003c/td\u003e\n                    \u003ctd\u003e453\u003c/td\u003e\n                    \u003ctd\u003e339\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e2_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e414\u003c/td\u003e\n                    \u003ctd\u003e316\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e3_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e382\u003c/td\u003e\n                    \u003ctd\u003e217\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e4_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e251\u003c/td\u003e\n                    \u003ctd\u003e143\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e5_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e158\u003c/td\u003e\n                    \u003ctd\u003e56\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e6_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e80\u003c/td\u003e\n                    \u003ctd\u003e19\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e7_subjects_same\u003c/td\u003e\n                    \u003ctd\u003e41\u003c/td\u003e\n                    \u003ctd\u003e12\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 6 (shape): repeated objects. Same as for subjects above, but for objects. There are much less\n              cases of repeated objects, so there are only two categories for this selection, unique_objects and\n              some_objects_repeated; for the latter, we have up to 3 coreferring objects in English, and XXX in Russian.\n            \u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eMax num. of repeated objects\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunique_objects\u003c/td\u003e\n                    \u003ctd\u003e1654\u003c/td\u003e\n                    \u003ctd\u003e1099\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esome_objects_same\u003c/td\u003e\n                    \u003ctd\u003e125\u003c/td\u003e\n                    \u003ctd\u003e3\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 7 (shape): repeated properties. Same as for objects above, but for properties; up to two\n              properties can be the same in English, up to XXX in Russian.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eMax num. of repeated properties\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunique_properties\u003c/td\u003e\n                    \u003ctd\u003e1510\u003c/td\u003e\n                    \u003ctd\u003e986\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esome_properties_same\u003c/td\u003e\n                    \u003ctd\u003e269\u003c/td\u003e\n                    \u003ctd\u003e116\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eSelection 8 (shape): entities that appear both as subject and object. For this selection, we grouped\n              together the inputs in which no entity is found as both subject and object, and on the other side inputs\n              in which one or more entity/ies appear both as subject and as object. We found up to two such entities per\n              input in English, and up to XXX in Russian.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eMax num. of objects and subjects in common\u003c/th\u003e\n                    \u003cth\u003eFrequency English\u003c/th\u003e\n                    \u003cth\u003eFrequency Russian\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eunique_properties\u003c/td\u003e\n                    \u003ctd\u003e1322\u003c/td\u003e\n                    \u003ctd\u003e642\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003esome_properties_same\u003c/td\u003e\n                    \u003ctd\u003e457\u003c/td\u003e\n                    \u003ctd\u003e460\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRobustness\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDataset construction: \u003ca href=\"https://www.aclweb.org/anthology/P17-1017/\"\u003emain dataset paper\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/C16-1141/\"\u003eRDF triple extraction\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W19-3706/\"\u003eRussian translation\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eWebNLG Challenge 2017: \u003ca href=\"https://webnlg-challenge.loria.fr/challenge_2017/\"\u003ewebpage\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W17-3518/\"\u003epaper\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eWebNLG Challenge 2020: \u003ca href=\"https://webnlg-challenge.loria.fr/challenge_2020/\"\u003ewebpage\u003c/a\u003e, \u003ca\n                href=\"https://webnlg-challenge.loria.fr/files/2020.webnlg-papers.7.pdf\"\u003epaper\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eEnriched version of WebNLG: \u003ca href=\"https://github.com/ThiagoCF05/webnlg\"\u003erepository\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/W18-6521/\"\u003epaper\u003c/a\u003e\u003c/p\u003e\n            \u003cp\u003eRelated research papers: \u003ca href=\"https://webnlg-challenge.loria.fr/research/\"\u003ewebpage\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor both languages, the participating systems are automatically evaluated in a multi-reference scenario.\n              Each English hypothesis is compared to a maximum of 5 references, and each Russian one to a maximum of 7\n              references. On average, English data has 2.89 references per test instance, and Russian data has 2.52\n              references per instance.\u003c/p\u003e\n            \u003cp\u003eIn a human evaluation, example are uniformly sampled across size of triple sets and the following\n              dimensions are assessed (on MTurk and Yandex.Toloka):\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eData Coverage: Does the text include descriptions of all predicates presented in the data?\u003c/li\u003e\n              \u003cli\u003eRelevance: Does the text describe only such predicates (with related subjects and objects), which are\n                found in the data?\u003c/li\u003e\n              \u003cli\u003eCorrectness: When describing predicates which are found in the data, does the text mention correct the\n                objects and adequately introduces the subject for this specific predicate?\u003c/li\u003e\n              \u003cli\u003eText Structure: Is the text grammatical, well-structured, written in acceptable English language?\u003c/li\u003e\n              \u003cli\u003eFluency: Is it possible to say that the text progresses naturally, forms a coherent whole and it is\n                easy to understand the text?\u003c/li\u003e\n            \u003c/ol\u003e\n            \u003cp\u003eFor additional information like the instructions, we refer to the original paper.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe evaluated a wide range of models as part of the GEM benchmark.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eResults can be found on the \u003ca href=\"https://gem-benchmark.com/results\"\u003eGEM website\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes - related tasks\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSocial Impact Observations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid any of these previous uses result in observations about the social impact of the systems? In\n                    particular, has there been work outlining the risks and limitations of the system? Provide links and\n                    descriptions here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe do not foresee any negative social impact in particular from this dataset or task.\u003c/p\u003e\n            \u003cp\u003ePositive outlooks: Being able to generate good quality text from RDF data would permit, e.g., making this\n              data more accessible to lay users, enriching existing text with information drawn from knowledge bases\n              such as DBpedia or describing, comparing and relating entities present in these knowledge bases.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is created using DBpedia RDF triples which naturally exhibit biases that have been found to\n              exist in Wikipedia such as some forms of, e.g., gender bias.\u003c/p\u003e\n            \u003cp\u003eThe choice of \u003ca\n                href=\"https://gitlab.com/shimorina/webnlg-dataset/-/blob/master/supplementary/entities_dict.json\"\u003eentities\u003c/a\u003e,\n              described by RDF trees, was not controlled. As such, they may contain gender biases; for instance, all the\n              astronauts described by RDF triples are male. Hence, in texts, pronouns \u003cem\u003ehe/him/his\u003c/em\u003e occur more\n              often. Similarly, entities can be related to the Western culture more often than to other cultures.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn English, the dataset is limited to the language that crowdraters speak. In Russian, the language is\n              heavily biased by the translationese of the translation system that is post-edited.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere is no PII in this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe quality of the crowdsourced references is limited, in particular in terms of fluency/naturalness of\n              the collected texts.\u003c/p\u003e\n            \u003cp\u003eRussian data was machine-translated and then post-edited by crowdworkers, so some examples may still\n              exhibit issues related to bad translations.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOnly a limited number of domains are covered in this dataset. As a result, it cannot be used as a\n              general-purpose realizer.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"web_nlg","type":"Data-to-Text","languages":"Russian, English","summary":"WebNLG is a bi-lingual dataset (English, Russian) of parallel DBpedia triple sets and short texts that cover about 450 different DBpedia properties. The WebNLG data was originally created to promote the development of RDF verbalisers able to generate short text and to handle micro-planning (i.e., sentence segmentation and ordering, referring expression generation, aggregation); the goal of the task is to generate texts starting from 1 to 7 input triples which have entities in common (so the input is actually a connected Knowledge Graph). The dataset contains about 17,000 triple sets and 45,000 crowdsourced texts in English, and 7,000 triples sets and 19,000 crowdsourced texts in Russian. A challenging test set section with entities and/or properties that have not been seen at training time is available."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"web_nlg"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/wiki_auto_asset_turk.html b/data_cards/wiki_auto_asset_turk.html
index 40f7998c..d86eba0e 100644
--- a/data_cards/wiki_auto_asset_turk.html
+++ b/data_cards/wiki_auto_asset_turk.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->wiki_auto_asset_turk</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">wiki_auto_asset_turk</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Simplification</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->wiki_auto_asset_turk</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">wiki_auto_asset_turk</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Simplification</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -2194,4 +2194,4 @@ <h5>Unsuited Applications
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"wiki_auto_asset_turk","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ewiki_auto_asset_turk\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eWikiAuto is an English simplification dataset that we paired with ASSET and TURK, two very high-quality\n          evaluation datasets, as test sets. The input is an English sentence taken from Wikipedia and the target a\n          simplified sentence. ASSET and TURK contain the same test examples but have references that are simplified in\n          different ways (splitting sentences vs. rewriting and splitting).\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/wiki_auto_asset_turk')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/wiki_auto_asset_turk\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.709/\"\u003eWikiAuto\u003c/a\u003e, \u003ca\n            href=\"https://aclanthology.org/2020.acl-main.424/\"\u003eASSET\u003c/a\u003e, \u003ca\n            href=\"https://aclanthology.org/Q16-1029/\"\u003eTURK\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eWikiAuto: Chao Jiang, Mounica Maddela, Wuwei Lan, Yang Zhong, Wei Xu; ASSET: Fernando Alva-Manchego, Louis\n          Martin, Antoine Bordes, Carolina Scarton, and Benoîıt Sagot, and Lucia Specia; TURK: Wei Xu, Courtney Napoles,\n          Ellie Pavlick, Quanze Chen, and Chris Callison-Burch\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eWikiAuto: Chao Jiang; ASSET: Fernando Alva-Manchego and Louis Martin; TURK: Wei Xu\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe goal is to communicate the main ideas of source sentence in a way that is easier to understand by\n          non-native speakers of English.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/chaojiang06/wiki-auto\"\u003eWiki-Auto repository\u003c/a\u003e, \u003ca\n                href=\"https://github.com/facebookresearch/asset\"\u003eASSET repository\u003c/a\u003e, \u003ca\n                href=\"https://github.com/cocoxu/simplification\"\u003eTURKCorpus\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.709/\"\u003eWikiAuto\u003c/a\u003e, \u003ca\n                href=\"https://aclanthology.org/2020.acl-main.424/\"\u003eASSET\u003c/a\u003e, \u003ca\n                href=\"https://aclanthology.org/Q16-1029/\"\u003eTURK\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{jiang-etal-2020-neural,\ntitle = \"Neural {CRF} Model for Sentence Alignment in Text Simplification\",\nauthor = \"Jiang, Chao  and\nMaddela, Mounica  and\nLan, Wuwei  and\nZhong, Yang  and\nXu, Wei\",\nbooktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/2020.acl-main.709\",\ndoi = \"10.18653/v1/2020.acl-main.709\",\npages = \"7943--7960\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eASSET:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{alva-manchego-etal-2020-asset,\ntitle = \"{ASSET}: {A} Dataset for Tuning and Evaluation of Sentence Simplification Models with Multiple Rewriting Transformations\",\nauthor = \"Alva-Manchego, Fernando  and\nMartin, Louis  and\nBordes, Antoine  and\nScarton, Carolina  and\nSagot, Beno{\\^\\i}t  and\nSpecia, Lucia\",\nbooktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/2020.acl-main.424\",\npages = \"4668--4679\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eTURK:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{Xu-EtAl:2016:TACL,\nauthor = {Wei Xu and Courtney Napoles and Ellie Pavlick and Quanze Chen and Chris Callison-Burch},\ntitle = {Optimizing Statistical Machine Translation for Text Simplification},\njournal = {Transactions of the Association for Computational Linguistics},\nvolume = {4},\nyear = {2016},\nurl = {https://cocoxu.github.io/publications/tacl2016-smt-simplification.pdf},\npages = {401--415}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: Chao Jiang; ASSET: Fernando Alva-Manchego and Louis Martin; TURK: Wei Xu\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jiang.1530@osu.edu\"\u003ejiang.1530@osu.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:f.alva@sheffield.ac.uk\"\u003ef.alva@sheffield.ac.uk\u003c/a\u003e, \u003ca\n                href=\"mailto:louismartincs@gmail.com\"\u003elouismartincs@gmail.com\u003c/a\u003e, \u003ca\n                href=\"mailto:wei.xu@cc.gatech.edu\"\u003ewei.xu@cc.gatech.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWiki-Auto contains English text only (BCP-47: \u003ccode\u003een\u003c/code\u003e). It is presented as a translation task\n              where Wikipedia Simple English is treated as its own idiom. For a statement of what is intended (but not\n              always observed) to constitute Simple English on this platform, see \u003ca\n                href=\"https://simple.wikipedia.org/wiki/Wikipedia:About#Simple_English\"\u003eSimple English in Wikipedia\u003c/a\u003e.\n              Both ASSET and TURK use crowdsourcing to change references, and their language is thus a combination of\n              the WikiAuto data and the language of the demographic on mechanical Turk\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto provides a set of aligned sentences from English Wikipedia and Simple English Wikipedia as a\n              resource to train sentence simplification systems.\u003c/p\u003e\n            \u003cp\u003eThe authors first crowd-sourced a set of manual alignments between sentences in a subset of the Simple\n              English Wikipedia and their corresponding versions in English Wikipedia (this corresponds to the\n              \u003ccode\u003emanual\u003c/code\u003e config in this version of the dataset), then trained a neural CRF system to predict\n              these alignments.\u003c/p\u003e\n            \u003cp\u003eThe trained alignment prediction model was then applied to the other articles in Simple English Wikipedia\n              with an English counterpart to create a larger corpus of aligned sentences (corresponding to the\n              \u003ccode\u003eauto\u003c/code\u003e and \u003ccode\u003eauto_acl\u003c/code\u003e configs here).\u003c/p\u003e\n            \u003cp\u003e\u003ca href=\"https://github.com/facebookresearch/asset\"\u003eASSET\u003c/a\u003e \u003ca\n                href=\"https://www.aclweb.org/anthology/2020.acl-main.424.pdf\"\u003e(Alva-Manchego et al., 2020)\u003c/a\u003e is\n              multi-reference dataset for the evaluation of sentence simplification in English. The dataset uses the\n              same 2,359 sentences from \u003ca href=\"https://github.com/cocoxu/simplification/\"\u003eTurkCorpus\u003c/a\u003e \u003ca\n                href=\"https://www.aclweb.org/anthology/Q16-1029.pdf\"\u003e(Xu et al., 2016)\u003c/a\u003e and each sentence is\n              associated with 10 crowdsourced simplifications. Unlike previous simplification datasets, which contain a\n              single transformation (e.g., lexical paraphrasing in TurkCorpus or sentence\n              splitting in \u003ca href=\"https://www.aclweb.org/anthology/D18-1081.pdf\"\u003eHSplit\u003c/a\u003e), the simplifications in\n              ASSET encompass a variety of rewriting transformations.\u003c/p\u003e\n            \u003cp\u003eTURKCorpus is a high quality simplification dataset where each source (not simple) sentence is associated\n              with 8 human-written simplifications that focus on lexical paraphrasing. It is one of the two evaluation\n              datasets for the text simplification task in GEM. It acts as the validation and test set for\n              paraphrasing-based simplification that does not involve sentence splitting and deletion.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAdd. License Info\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the 'other' license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: \u003ccode\u003eCC BY-NC 3.0\u003c/code\u003e, ASSET: \u003ccode\u003eCC BY-NC 4.0\u003c/code\u003e, TURK:\n              \u003ccode\u003eGNU General Public License v3.0\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal is to communicate the main ideas of source sentence in a way that is easier to understand by\n              non-native speakers of English.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOhio State University, University of Sheffield, Inria, Facebook AI Research, Imperial College London,\n              University of Pennsylvania, John Hopkins University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: Chao Jiang, Mounica Maddela, Wuwei Lan, Yang Zhong, Wei Xu; ASSET: Fernando Alva-Manchego,\n              Louis Martin, Antoine Bordes, Carolina Scarton, and Benoîıt Sagot, and Lucia Specia; TURK: Wei Xu,\n              Courtney Napoles, Ellie Pavlick, Quanze Chen, and Chris Callison-Burch\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: NSF, ODNI, IARPA, Figure Eight AI, and Criteo. ASSET: PRAIRIE Institute, ANR. TURK: NSF\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGEM v1 had separate data cards for WikiAuto, ASSET, and TURK. They were contributed by Dhruv Kumar and\n              Mounica Maddela. The initial data loader was written by Yacine Jernite. Sebastian Gehrmann merged and\n              extended the data cards and migrated the loader to the v2 infrastructure.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003esource\u003c/code\u003e: A source sentence from one of the datasets\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget\u003c/code\u003e: A single simplified sentence corresponding to \u003ccode\u003esource\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ereferences\u003c/code\u003e: In the case of ASSET/TURK, references is a list of strings corresponding to\n                the different references.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe underlying datasets have extensive secondary annotations that can be used in conjunction with the GEM\n              version. We omit those annotations to simplify the format into one that can be used by seq2seq models.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'source': 'In early work, Rutherford discovered the concept of radioactive half-life , the radioactive element radon, and differentiated and named alpha and beta radiation .',\n'target': 'Rutherford discovered the radioactive half-life, and the three parts of radiation which he named Alpha, Beta, and Gamma.'\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn WikiAuto, which is used as training and validation set, the following splits are provided:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eTain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTotal sentence pairs\u003c/td\u003e\n                    \u003ctd\u003e373801\u003c/td\u003e\n                    \u003ctd\u003e73249\u003c/td\u003e\n                    \u003ctd\u003e118074\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAligned sentence pairs\u003c/td\u003e\n                    \u003ctd\u003e1889\u003c/td\u003e\n                    \u003ctd\u003e346\u003c/td\u003e\n                    \u003ctd\u003e677\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eASSET does not contain a training set; many models use \u003ca\n                href=\"https://github.com/XingxingZhang/dress\"\u003eWikiLarge\u003c/a\u003e (Zhang and Lapata, 2017) for training. For\n              GEM, \u003ca href=\"https://github.com/chaojiang06/wiki-auto\"\u003eWiki-Auto\u003c/a\u003e will be used for training the model.\n            \u003c/p\u003e\n            \u003cp\u003eEach input sentence has 10 associated reference simplified sentences. The statistics of ASSET are given\n              below.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                    \u003cth\u003eTotal\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eInput Sentences\u003c/td\u003e\n                    \u003ctd\u003e2000\u003c/td\u003e\n                    \u003ctd\u003e359\u003c/td\u003e\n                    \u003ctd\u003e2359\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eReference Simplifications\u003c/td\u003e\n                    \u003ctd\u003e20000\u003c/td\u003e\n                    \u003ctd\u003e3590\u003c/td\u003e\n                    \u003ctd\u003e23590\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe test and validation sets are the same as those of \u003ca\n                href=\"https://github.com/cocoxu/simplification/\"\u003eTurkCorpus\u003c/a\u003e. The split was random.\u003c/p\u003e\n            \u003cp\u003eThere are 19.04 tokens per reference on average (lower than 21.29 and 25.49 for TurkCorpus and HSplit,\n              respectively). Most (17,245) of the referece sentences do not involve sentence splitting.\u003c/p\u003e\n            \u003cp\u003eTURKCorpus does not contain a training set; many models use \u003ca\n                href=\"https://github.com/XingxingZhang/dress\"\u003eWikiLarge\u003c/a\u003e (Zhang and Lapata, 2017) or \u003ca\n                href=\"https://github.com/chaojiang06/wiki-auto\"\u003eWiki-Auto\u003c/a\u003e (Jiang et. al 2020) for training.\u003c/p\u003e\n            \u003cp\u003eEach input sentence has 8 associated reference simplified sentences. 2,359 input sentences are randomly\n              split into 2,000 validation and 359 test sentences.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                    \u003cth\u003eTotal\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eInput Sentences\u003c/td\u003e\n                    \u003ctd\u003e2000\u003c/td\u003e\n                    \u003ctd\u003e359\u003c/td\u003e\n                    \u003ctd\u003e2359\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eReference Simplifications\u003c/td\u003e\n                    \u003ctd\u003e16000\u003c/td\u003e\n                    \u003ctd\u003e2872\u003c/td\u003e\n                    \u003ctd\u003e18872\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThere are 21.29 tokens per reference on average.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn our setup, we use WikiAuto as training/validation corpus and ASSET and TURK as test corpora. ASSET and\n              TURK have the same inputs but differ in their reference style. Researchers can thus conduct targeted\n              evaluations based on the strategies that a model should learn.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto is the largest open text simplification dataset currently available. ASSET and TURK are high\n              quality test sets that are compatible with WikiAuto.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt's unique setup with multiple test sets makes the task interesting since it allows for evaluation of\n              multiple generations and systems that simplify in different ways.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esimplification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe removed secondary annotations and focus on the simple \u003ccode\u003einput-\u003eoutput\u003c/code\u003e format, but combine\n              the different sub-datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ewe split the original test set according to syntactic complexity of the source sentences. To characterize\n              sentence syntactic complexity, we use the 8-level developmental level (d-level) scale proposed by \u003ca\n                href=\"https://www.researchgate.net/publication/254033869_How_complex_is_that_sentence_A_proposed_revision_of_the_Rosenberg_and_Abbeduto_D-Level_Scale\"\u003eCovington\n                et al. (2006)\u003c/a\u003e and the implementation of \u003ca\n                href=\"https://www.jbe-platform.com/content/journals/10.1075/ijcl.15.4.02lu\"\u003eLu, Xiaofei (2010)\u003c/a\u003e.\n              We thus split the original test set into 8 subsets corresponding to the 8 d-levels assigned to source\n              sentences. We obtain the following number of instances per level and average d-level of the dataset:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eTotal nb. sentences\u003c/th\u003e\n                    \u003cth\u003eL0\u003c/th\u003e\n                    \u003cth\u003eL1\u003c/th\u003e\n                    \u003cth\u003eL2\u003c/th\u003e\n                    \u003cth\u003eL3\u003c/th\u003e\n                    \u003cth\u003eL4\u003c/th\u003e\n                    \u003cth\u003eL5\u003c/th\u003e\n                    \u003cth\u003eL6\u003c/th\u003e\n                    \u003cth\u003eL7\u003c/th\u003e\n                    \u003cth\u003eMean Level\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e359\u003c/td\u003e\n                    \u003ctd\u003e166\u003c/td\u003e\n                    \u003ctd\u003e0\u003c/td\u003e\n                    \u003ctd\u003e58\u003c/td\u003e\n                    \u003ctd\u003e32\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e28\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e63\u003c/td\u003e\n                    \u003ctd\u003e2.38\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal was to assess performance when simplifying source sentences with different syntactic structure\n              and complexity.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are recent supervised (\u003ca href=\"https://arxiv.org/abs/1910.02677\"\u003eMartin et al., 2019\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/N19-1317/\"\u003eKriz et al., 2019\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/P19-1331/\"\u003eDong et al., 2019\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/D17-1062/\"\u003eZhang and Lapata, 2017\u003c/a\u003e) and unsupervised (\u003ca\n                href=\"https://arxiv.org/abs/2005.00352v1\"\u003eMartin et al., 2020\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/2020.acl-main.707/\"\u003eKumar et al., 2020\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/P19-1198/\"\u003eSurya et al., 2019\u003c/a\u003e) text simplification models\n              that can be used as baselines.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe common metric used for automatic evaluation is SARI \u003ca\n                href=\"https://www.aclweb.org/anthology/Q16-1029/\"\u003e(Xu et al., 2016)\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSARI: A simplification metric that considers both input and references to measure the \"goodness\" of words\n              that are added, deleted, and kept.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original authors of WikiAuto and ASSET used human evaluation to assess the fluency, adequacy, and\n              simplicity (details provided in the paper). For TURK, the authors measured grammaticality,\n              meaning-preservation, and simplicity gain (details in the paper).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWiki-Auto provides a new version of the Wikipedia corpus that is larger, contains 75% less defective\n              pairs and has more complex rewrites than the previous WIKILARGE dataset.\u003c/p\u003e\n            \u003cp\u003eASSET was created in order to improve the evaluation of sentence simplification. It uses the same input\n              sentences as the \u003ca href=\"https://github.com/cocoxu/simplification/\"\u003eTurkCorpus\u003c/a\u003e dataset from \u003ca\n                href=\"https://www.aclweb.org/anthology/Q16-1029.pdf\"\u003e(Xu et al., 2016)\u003c/a\u003e. The 2,359 input sentences of\n              TurkCorpus are a sample of \"standard\" (not simple) sentences from the \u003ca\n                href=\"https://www.informatik.tu-darmstadt.de/ukp/research_6/data/sentence_simplification/simple_complex_sentence_pairs/index.en.jsp\"\u003eParallel\n                Wikipedia Simplification (PWKP)\u003c/a\u003e dataset \u003ca href=\"https://www.aclweb.org/anthology/C10-1152.pdf\"\u003e(Zhu\n                et al., 2010)\u003c/a\u003e, which come from the August 22, 2009 version of Wikipedia. The sentences of TurkCorpus\n              were chosen to be of similar length \u003ca href=\"https://www.aclweb.org/anthology/Q16-1029.pdf\"\u003e(Xu et al.,\n                2016)\u003c/a\u003e. No further information is provided on the sampling strategy.\u003c/p\u003e\n            \u003cp\u003eThe TurkCorpus dataset was developed in order to overcome some of the problems with sentence pairs from\n              Standard and Simple Wikipedia: a large fraction of sentences were misaligned, or not actually simpler \u003ca\n                href=\"https://www.aclweb.org/anthology/Q16-1029.pdf\"\u003e(Xu et al., 2016)\u003c/a\u003e. However, TurkCorpus mainly\n              focused on \u003cem\u003elexical paraphrasing\u003c/em\u003e, and so cannot be used to evaluate simplifications involving\n              \u003cem\u003ecompression\u003c/em\u003e (deletion) or \u003cem\u003esentence splitting\u003c/em\u003e. HSplit \u003ca\n                href=\"https://www.aclweb.org/anthology/D18-1081.pdf\"\u003e(Sulem et al., 2018)\u003c/a\u003e, on the other hand, can\n              only be used to evaluate sentence splitting. The reference sentences in ASSET include a wider variety of\n              sentence rewriting strategies, combining splitting, compression and paraphrasing. Annotators were given\n              examples of each kind of transformation individually, as well as all three transformations used at once,\n              but were allowed to decide which transformations to use for any given sentence.\u003c/p\u003e\n            \u003cp\u003eAn example illustrating the differences between TurkCorpus, HSplit and ASSET is given below:\u003c/p\u003e\n            \u003cblockquote\u003e\n              \u003cp\u003e\u003cstrong\u003eOriginal:\u003c/strong\u003e He settled in London, devoting himself chiefly to practical teaching.\u003c/p\u003e\n              \u003cp\u003e\u003cstrong\u003eTurkCorpus:\u003c/strong\u003e He rooted in London, devoting himself mainly to practical teaching.\u003c/p\u003e\n              \u003cp\u003e\u003cstrong\u003eHSplit:\u003c/strong\u003e He settled in London. He devoted himself chiefly to practical teaching.\u003c/p\u003e\n              \u003cp\u003e\u003cstrong\u003eASSET:\u003c/strong\u003e He lived in London. He was a teacher.\u003c/p\u003e\n            \u003c/blockquote\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal is to communicate the same information as the source sentence using simpler words and grammar.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikipedia\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset uses language from Wikipedia: some demographic information is provided \u003ca\n                href=\"https://en.wikipedia.org/wiki/Wikipedia:Who_writes_Wikipedia%3F\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe authors mention that they \"extracted 138,095 article pairs from the 2019/09 Wikipedia dump using an\n              improved version of the \u003ca href=\"https://github.com/attardi/wikiextractor\"\u003eWikiExtractor\u003c/a\u003e library\". The\n              \u003ca href=\"https://spacy.io/\"\u003eSpaCy\u003c/a\u003e library is used for sentence splitting.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e11\u0026#x3C;n\u0026#x3C;50\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto (Figure Eight): No information provided.\u003c/p\u003e\n            \u003cp\u003eASSET (MTurk):\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eHaving a HIT approval rate over 95%, and over 1000 HITs approved. No other demographic or compensation\n                information is provided.\u003c/li\u003e\n              \u003cli\u003ePassing a Qualification Test (appropriately simplifying sentences). Out of 100 workers, 42 passed the\n                test.\u003c/li\u003e\n              \u003cli\u003eBeing a resident of the United States, United Kingdom or Canada.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eTURK (MTurk):\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eReference sentences were written by workers with HIT approval rate over 95%. No other demographic or\n                compensation information is provided.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cblockquote\u003e\n              \u003cp\u003e5\u003c/p\u003e\n            \u003c/blockquote\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhich Annotation Service\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhich annotation services were used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e, \u003ccode\u003eAppen\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: Sentence alignment labels were crowdsourced for 500 randomly sampled document pairs (10,123\n              sentence pairs total). The authors pre-selected several alignment candidates from English Wikipedia for\n              each Simple Wikipedia sentence based on various similarity metrics, then asked the crowd-workers to\n              annotate these pairs. Finally, they trained their alignment model on this manually annotated dataset to\n              obtain automatically aligned sentences (138,095 document pairs, 488,332 sentence pairs).\n              No demographic annotation is provided for the crowd workers. The \u003ca\n                href=\"https://www.figure-eight.com/\"\u003eFigure Eight\u003c/a\u003e platform now part of Appen) was used for the\n              annotation process.\u003c/p\u003e\n            \u003cp\u003eASSET: The instructions given to the annotators are available \u003ca\n                href=\"https://github.com/facebookresearch/asset/blob/master/crowdsourcing/AMT_AnnotationInstructions.pdf\"\u003ehere\u003c/a\u003e.\n            \u003c/p\u003e\n            \u003cp\u003eTURK: The references are crowdsourced from Amazon Mechanical Turk. The annotators were asked to provide\n              simplifications without losing any information or splitting the input sentence. No other demographic or\n              compensation information is provided in the TURKCorpus paper. The instructions given to the annotators are\n              available in the paper.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBoth Figure Eight and Amazon Mechanical Turk raters forfeit the right to their data as part of their\n              agreements.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the dataset is created from Wikipedia/Simple Wikipedia, all the information contained in the\n              dataset is already in the public domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset may contain some social biases, as the input sentences are based on Wikipedia. Studies have\n              shown that the English Wikipedia contains both gender biases \u003ca\n                href=\"https://research.tudelft.nl/en/publications/is-wikipedia-succeeding-in-reducing-gender-bias-assessing-changes\"\u003e(Schmahl\n                et al., 2020)\u003c/a\u003e and racial biases \u003ca\n                href=\"https://journals.sagepub.com/doi/pdf/10.1177/2378023118823946\"\u003e(Adams et al., 2019)\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAll the data is in the public domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset may contain some social biases, as the input sentences are based on Wikipedia. Studies have\n              shown that the English Wikipedia contains both gender biases \u003ca\n                href=\"https://research.tudelft.nl/en/publications/is-wikipedia-succeeding-in-reducing-gender-bias-assessing-changes\"\u003e(Schmahl\n                et al., 2020)\u003c/a\u003e and racial biases \u003ca\n                href=\"https://journals.sagepub.com/doi/pdf/10.1177/2378023118823946\"\u003e(Adams et al., 2019)\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the test datasets contains only 2,359 sentences that are derived from Wikipedia, they are limited\n              to a small subset of topics present on Wikipedia.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"wiki_auto_asset_turk","type":"Simplification","languages":"English","summary":"WikiAuto is an English simplification dataset that we paired with ASSET and TURK, two very high-quality evaluation datasets, as test sets. The input is an English sentence taken from Wikipedia and the target a simplified sentence. ASSET and TURK contain the same test examples but have references that are simplified in different ways (splitting sentences vs. rewriting and splitting)."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"wiki_auto_asset_turk"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"wiki_auto_asset_turk","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ewiki_auto_asset_turk\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eWikiAuto is an English simplification dataset that we paired with ASSET and TURK, two very high-quality\n          evaluation datasets, as test sets. The input is an English sentence taken from Wikipedia and the target a\n          simplified sentence. ASSET and TURK contain the same test examples but have references that are simplified in\n          different ways (splitting sentences vs. rewriting and splitting).\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/wiki_auto_asset_turk')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/wiki_auto_asset_turk\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.709/\"\u003eWikiAuto\u003c/a\u003e, \u003ca\n            href=\"https://aclanthology.org/2020.acl-main.424/\"\u003eASSET\u003c/a\u003e, \u003ca\n            href=\"https://aclanthology.org/Q16-1029/\"\u003eTURK\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eWikiAuto: Chao Jiang, Mounica Maddela, Wuwei Lan, Yang Zhong, Wei Xu; ASSET: Fernando Alva-Manchego, Louis\n          Martin, Antoine Bordes, Carolina Scarton, and Benoîıt Sagot, and Lucia Specia; TURK: Wei Xu, Courtney Napoles,\n          Ellie Pavlick, Quanze Chen, and Chris Callison-Burch\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eWikiAuto: Chao Jiang; ASSET: Fernando Alva-Manchego and Louis Martin; TURK: Wei Xu\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eother: Other license\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eThe goal is to communicate the main ideas of source sentence in a way that is easier to understand by\n          non-native speakers of English.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecrowd-sourced\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/chaojiang06/wiki-auto\"\u003eWiki-Auto repository\u003c/a\u003e, \u003ca\n                href=\"https://github.com/facebookresearch/asset\"\u003eASSET repository\u003c/a\u003e, \u003ca\n                href=\"https://github.com/cocoxu/simplification\"\u003eTURKCorpus\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2020.acl-main.709/\"\u003eWikiAuto\u003c/a\u003e, \u003ca\n                href=\"https://aclanthology.org/2020.acl-main.424/\"\u003eASSET\u003c/a\u003e, \u003ca\n                href=\"https://aclanthology.org/Q16-1029/\"\u003eTURK\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{jiang-etal-2020-neural,\ntitle = \"Neural {CRF} Model for Sentence Alignment in Text Simplification\",\nauthor = \"Jiang, Chao  and\nMaddela, Mounica  and\nLan, Wuwei  and\nZhong, Yang  and\nXu, Wei\",\nbooktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/2020.acl-main.709\",\ndoi = \"10.18653/v1/2020.acl-main.709\",\npages = \"7943--7960\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eASSET:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{alva-manchego-etal-2020-asset,\ntitle = \"{ASSET}: {A} Dataset for Tuning and Evaluation of Sentence Simplification Models with Multiple Rewriting Transformations\",\nauthor = \"Alva-Manchego, Fernando  and\nMartin, Louis  and\nBordes, Antoine  and\nScarton, Carolina  and\nSagot, Beno{\\^\\i}t  and\nSpecia, Lucia\",\nbooktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2020\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://www.aclweb.org/anthology/2020.acl-main.424\",\npages = \"4668--4679\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eTURK:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@article{Xu-EtAl:2016:TACL,\nauthor = {Wei Xu and Courtney Napoles and Ellie Pavlick and Quanze Chen and Chris Callison-Burch},\ntitle = {Optimizing Statistical Machine Translation for Text Simplification},\njournal = {Transactions of the Association for Computational Linguistics},\nvolume = {4},\nyear = {2016},\nurl = {https://cocoxu.github.io/publications/tacl2016-smt-simplification.pdf},\npages = {401--415}\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: Chao Jiang; ASSET: Fernando Alva-Manchego and Louis Martin; TURK: Wei Xu\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:jiang.1530@osu.edu\"\u003ejiang.1530@osu.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:f.alva@sheffield.ac.uk\"\u003ef.alva@sheffield.ac.uk\u003c/a\u003e, \u003ca\n                href=\"mailto:louismartincs@gmail.com\"\u003elouismartincs@gmail.com\u003c/a\u003e, \u003ca\n                href=\"mailto:wei.xu@cc.gatech.edu\"\u003ewei.xu@cc.gatech.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWiki-Auto contains English text only (BCP-47: \u003ccode\u003een\u003c/code\u003e). It is presented as a translation task\n              where Wikipedia Simple English is treated as its own idiom. For a statement of what is intended (but not\n              always observed) to constitute Simple English on this platform, see \u003ca\n                href=\"https://simple.wikipedia.org/wiki/Wikipedia:About#Simple_English\"\u003eSimple English in Wikipedia\u003c/a\u003e.\n              Both ASSET and TURK use crowdsourcing to change references, and their language is thus a combination of\n              the WikiAuto data and the language of the demographic on mechanical Turk\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother: Other license\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto provides a set of aligned sentences from English Wikipedia and Simple English Wikipedia as a\n              resource to train sentence simplification systems.\u003c/p\u003e\n            \u003cp\u003eThe authors first crowd-sourced a set of manual alignments between sentences in a subset of the Simple\n              English Wikipedia and their corresponding versions in English Wikipedia (this corresponds to the\n              \u003ccode\u003emanual\u003c/code\u003e config in this version of the dataset), then trained a neural CRF system to predict\n              these alignments.\u003c/p\u003e\n            \u003cp\u003eThe trained alignment prediction model was then applied to the other articles in Simple English Wikipedia\n              with an English counterpart to create a larger corpus of aligned sentences (corresponding to the\n              \u003ccode\u003eauto\u003c/code\u003e and \u003ccode\u003eauto_acl\u003c/code\u003e configs here).\u003c/p\u003e\n            \u003cp\u003e\u003ca href=\"https://github.com/facebookresearch/asset\"\u003eASSET\u003c/a\u003e \u003ca\n                href=\"https://www.aclweb.org/anthology/2020.acl-main.424.pdf\"\u003e(Alva-Manchego et al., 2020)\u003c/a\u003e is\n              multi-reference dataset for the evaluation of sentence simplification in English. The dataset uses the\n              same 2,359 sentences from \u003ca href=\"https://github.com/cocoxu/simplification/\"\u003eTurkCorpus\u003c/a\u003e \u003ca\n                href=\"https://www.aclweb.org/anthology/Q16-1029.pdf\"\u003e(Xu et al., 2016)\u003c/a\u003e and each sentence is\n              associated with 10 crowdsourced simplifications. Unlike previous simplification datasets, which contain a\n              single transformation (e.g., lexical paraphrasing in TurkCorpus or sentence\n              splitting in \u003ca href=\"https://www.aclweb.org/anthology/D18-1081.pdf\"\u003eHSplit\u003c/a\u003e), the simplifications in\n              ASSET encompass a variety of rewriting transformations.\u003c/p\u003e\n            \u003cp\u003eTURKCorpus is a high quality simplification dataset where each source (not simple) sentence is associated\n              with 8 human-written simplifications that focus on lexical paraphrasing. It is one of the two evaluation\n              datasets for the text simplification task in GEM. It acts as the validation and test set for\n              paraphrasing-based simplification that does not involve sentence splitting and deletion.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAdd. License Info\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the 'other' license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: \u003ccode\u003eCC BY-NC 3.0\u003c/code\u003e, ASSET: \u003ccode\u003eCC BY-NC 4.0\u003c/code\u003e, TURK:\n              \u003ccode\u003eGNU General Public License v3.0\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal is to communicate the main ideas of source sentence in a way that is easier to understand by\n              non-native speakers of English.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e, \u003ccode\u003eindustry\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOhio State University, University of Sheffield, Inria, Facebook AI Research, Imperial College London,\n              University of Pennsylvania, John Hopkins University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: Chao Jiang, Mounica Maddela, Wuwei Lan, Yang Zhong, Wei Xu; ASSET: Fernando Alva-Manchego,\n              Louis Martin, Antoine Bordes, Carolina Scarton, and Benoîıt Sagot, and Lucia Specia; TURK: Wei Xu,\n              Courtney Napoles, Ellie Pavlick, Quanze Chen, and Chris Callison-Burch\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: NSF, ODNI, IARPA, Figure Eight AI, and Criteo. ASSET: PRAIRIE Institute, ANR. TURK: NSF\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGEM v1 had separate data cards for WikiAuto, ASSET, and TURK. They were contributed by Dhruv Kumar and\n              Mounica Maddela. The initial data loader was written by Yacine Jernite. Sebastian Gehrmann merged and\n              extended the data cards and migrated the loader to the v2 infrastructure.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003esource\u003c/code\u003e: A source sentence from one of the datasets\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etarget\u003c/code\u003e: A single simplified sentence corresponding to \u003ccode\u003esource\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ereferences\u003c/code\u003e: In the case of ASSET/TURK, references is a list of strings corresponding to\n                the different references.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe underlying datasets have extensive secondary annotations that can be used in conjunction with the GEM\n              version. We omit those annotations to simplify the format into one that can be used by seq2seq models.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'source': 'In early work, Rutherford discovered the concept of radioactive half-life , the radioactive element radon, and differentiated and named alpha and beta radiation .',\n'target': 'Rutherford discovered the radioactive half-life, and the three parts of radiation which he named Alpha, Beta, and Gamma.'\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn WikiAuto, which is used as training and validation set, the following splits are provided:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eTain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTotal sentence pairs\u003c/td\u003e\n                    \u003ctd\u003e373801\u003c/td\u003e\n                    \u003ctd\u003e73249\u003c/td\u003e\n                    \u003ctd\u003e118074\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAligned sentence pairs\u003c/td\u003e\n                    \u003ctd\u003e1889\u003c/td\u003e\n                    \u003ctd\u003e346\u003c/td\u003e\n                    \u003ctd\u003e677\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eASSET does not contain a training set; many models use \u003ca\n                href=\"https://github.com/XingxingZhang/dress\"\u003eWikiLarge\u003c/a\u003e (Zhang and Lapata, 2017) for training. For\n              GEM, \u003ca href=\"https://github.com/chaojiang06/wiki-auto\"\u003eWiki-Auto\u003c/a\u003e will be used for training the model.\n            \u003c/p\u003e\n            \u003cp\u003eEach input sentence has 10 associated reference simplified sentences. The statistics of ASSET are given\n              below.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                    \u003cth\u003eTotal\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eInput Sentences\u003c/td\u003e\n                    \u003ctd\u003e2000\u003c/td\u003e\n                    \u003ctd\u003e359\u003c/td\u003e\n                    \u003ctd\u003e2359\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eReference Simplifications\u003c/td\u003e\n                    \u003ctd\u003e20000\u003c/td\u003e\n                    \u003ctd\u003e3590\u003c/td\u003e\n                    \u003ctd\u003e23590\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThe test and validation sets are the same as those of \u003ca\n                href=\"https://github.com/cocoxu/simplification/\"\u003eTurkCorpus\u003c/a\u003e. The split was random.\u003c/p\u003e\n            \u003cp\u003eThere are 19.04 tokens per reference on average (lower than 21.29 and 25.49 for TurkCorpus and HSplit,\n              respectively). Most (17,245) of the referece sentences do not involve sentence splitting.\u003c/p\u003e\n            \u003cp\u003eTURKCorpus does not contain a training set; many models use \u003ca\n                href=\"https://github.com/XingxingZhang/dress\"\u003eWikiLarge\u003c/a\u003e (Zhang and Lapata, 2017) or \u003ca\n                href=\"https://github.com/chaojiang06/wiki-auto\"\u003eWiki-Auto\u003c/a\u003e (Jiang et. al 2020) for training.\u003c/p\u003e\n            \u003cp\u003eEach input sentence has 8 associated reference simplified sentences. 2,359 input sentences are randomly\n              split into 2,000 validation and 359 test sentences.\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003e\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                    \u003cth\u003eTotal\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eInput Sentences\u003c/td\u003e\n                    \u003ctd\u003e2000\u003c/td\u003e\n                    \u003ctd\u003e359\u003c/td\u003e\n                    \u003ctd\u003e2359\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eReference Simplifications\u003c/td\u003e\n                    \u003ctd\u003e16000\u003c/td\u003e\n                    \u003ctd\u003e2872\u003c/td\u003e\n                    \u003ctd\u003e18872\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003eThere are 21.29 tokens per reference on average.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIn our setup, we use WikiAuto as training/validation corpus and ASSET and TURK as test corpora. ASSET and\n              TURK have the same inputs but differ in their reference style. Researchers can thus conduct targeted\n              evaluations based on the strategies that a model should learn.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto is the largest open text simplification dataset currently available. ASSET and TURK are high\n              quality test sets that are compatible with WikiAuto.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIt's unique setup with multiple test sets makes the task interesting since it allows for evaluation of\n              multiple generations and systems that simplify in different ways.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003esimplification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe removed secondary annotations and focus on the simple \u003ccode\u003einput-\u003eoutput\u003c/code\u003e format, but combine\n              the different sub-datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Information\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how the new splits were created\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ewe split the original test set according to syntactic complexity of the source sentences. To characterize\n              sentence syntactic complexity, we use the 8-level developmental level (d-level) scale proposed by \u003ca\n                href=\"https://www.researchgate.net/publication/254033869_How_complex_is_that_sentence_A_proposed_revision_of_the_Rosenberg_and_Abbeduto_D-Level_Scale\"\u003eCovington\n                et al. (2006)\u003c/a\u003e and the implementation of \u003ca\n                href=\"https://www.jbe-platform.com/content/journals/10.1075/ijcl.15.4.02lu\"\u003eLu, Xiaofei (2010)\u003c/a\u003e.\n              We thus split the original test set into 8 subsets corresponding to the 8 d-levels assigned to source\n              sentences. We obtain the following number of instances per level and average d-level of the dataset:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eTotal nb. sentences\u003c/th\u003e\n                    \u003cth\u003eL0\u003c/th\u003e\n                    \u003cth\u003eL1\u003c/th\u003e\n                    \u003cth\u003eL2\u003c/th\u003e\n                    \u003cth\u003eL3\u003c/th\u003e\n                    \u003cth\u003eL4\u003c/th\u003e\n                    \u003cth\u003eL5\u003c/th\u003e\n                    \u003cth\u003eL6\u003c/th\u003e\n                    \u003cth\u003eL7\u003c/th\u003e\n                    \u003cth\u003eMean Level\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003e359\u003c/td\u003e\n                    \u003ctd\u003e166\u003c/td\u003e\n                    \u003ctd\u003e0\u003c/td\u003e\n                    \u003ctd\u003e58\u003c/td\u003e\n                    \u003ctd\u003e32\u003c/td\u003e\n                    \u003ctd\u003e5\u003c/td\u003e\n                    \u003ctd\u003e28\u003c/td\u003e\n                    \u003ctd\u003e7\u003c/td\u003e\n                    \u003ctd\u003e63\u003c/td\u003e\n                    \u003ctd\u003e2.38\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSplit Motivation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspects of the model's generation capacities were the splits created to test?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal was to assess performance when simplifying source sentences with different syntactic structure\n              and complexity.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThere are recent supervised (\u003ca href=\"https://arxiv.org/abs/1910.02677\"\u003eMartin et al., 2019\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/N19-1317/\"\u003eKriz et al., 2019\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/P19-1331/\"\u003eDong et al., 2019\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/D17-1062/\"\u003eZhang and Lapata, 2017\u003c/a\u003e) and unsupervised (\u003ca\n                href=\"https://arxiv.org/abs/2005.00352v1\"\u003eMartin et al., 2020\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/2020.acl-main.707/\"\u003eKumar et al., 2020\u003c/a\u003e, \u003ca\n                href=\"https://www.aclweb.org/anthology/P19-1198/\"\u003eSurya et al., 2019\u003c/a\u003e) text simplification models\n              that can be used as baselines.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Terms\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eTechnical terms used in this card and the dataset and their definitions\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe common metric used for automatic evaluation is SARI \u003ca\n                href=\"https://www.aclweb.org/anthology/Q16-1029/\"\u003e(Xu et al., 2016)\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSimplification\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther: Other Metrics\u003c/code\u003e, \u003ccode\u003eBLEU\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSARI: A simplification metric that considers both input and references to measure the \"goodness\" of words\n              that are added, deleted, and kept.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original authors of WikiAuto and ASSET used human evaluation to assess the fluency, adequacy, and\n              simplicity (details provided in the paper). For TURK, the authors measured grammaticality,\n              meaning-preservation, and simplicity gain (details in the paper).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWiki-Auto provides a new version of the Wikipedia corpus that is larger, contains 75% less defective\n              pairs and has more complex rewrites than the previous WIKILARGE dataset.\u003c/p\u003e\n            \u003cp\u003eASSET was created in order to improve the evaluation of sentence simplification. It uses the same input\n              sentences as the \u003ca href=\"https://github.com/cocoxu/simplification/\"\u003eTurkCorpus\u003c/a\u003e dataset from \u003ca\n                href=\"https://www.aclweb.org/anthology/Q16-1029.pdf\"\u003e(Xu et al., 2016)\u003c/a\u003e. The 2,359 input sentences of\n              TurkCorpus are a sample of \"standard\" (not simple) sentences from the \u003ca\n                href=\"https://www.informatik.tu-darmstadt.de/ukp/research_6/data/sentence_simplification/simple_complex_sentence_pairs/index.en.jsp\"\u003eParallel\n                Wikipedia Simplification (PWKP)\u003c/a\u003e dataset \u003ca href=\"https://www.aclweb.org/anthology/C10-1152.pdf\"\u003e(Zhu\n                et al., 2010)\u003c/a\u003e, which come from the August 22, 2009 version of Wikipedia. The sentences of TurkCorpus\n              were chosen to be of similar length \u003ca href=\"https://www.aclweb.org/anthology/Q16-1029.pdf\"\u003e(Xu et al.,\n                2016)\u003c/a\u003e. No further information is provided on the sampling strategy.\u003c/p\u003e\n            \u003cp\u003eThe TurkCorpus dataset was developed in order to overcome some of the problems with sentence pairs from\n              Standard and Simple Wikipedia: a large fraction of sentences were misaligned, or not actually simpler \u003ca\n                href=\"https://www.aclweb.org/anthology/Q16-1029.pdf\"\u003e(Xu et al., 2016)\u003c/a\u003e. However, TurkCorpus mainly\n              focused on \u003cem\u003elexical paraphrasing\u003c/em\u003e, and so cannot be used to evaluate simplifications involving\n              \u003cem\u003ecompression\u003c/em\u003e (deletion) or \u003cem\u003esentence splitting\u003c/em\u003e. HSplit \u003ca\n                href=\"https://www.aclweb.org/anthology/D18-1081.pdf\"\u003e(Sulem et al., 2018)\u003c/a\u003e, on the other hand, can\n              only be used to evaluate sentence splitting. The reference sentences in ASSET include a wider variety of\n              sentence rewriting strategies, combining splitting, compression and paraphrasing. Annotators were given\n              examples of each kind of transformation individually, as well as all three transformations used at once,\n              but were allowed to decide which transformations to use for any given sentence.\u003c/p\u003e\n            \u003cp\u003eAn example illustrating the differences between TurkCorpus, HSplit and ASSET is given below:\u003c/p\u003e\n            \u003cblockquote\u003e\n              \u003cp\u003e\u003cstrong\u003eOriginal:\u003c/strong\u003e He settled in London, devoting himself chiefly to practical teaching.\u003c/p\u003e\n              \u003cp\u003e\u003cstrong\u003eTurkCorpus:\u003c/strong\u003e He rooted in London, devoting himself mainly to practical teaching.\u003c/p\u003e\n              \u003cp\u003e\u003cstrong\u003eHSplit:\u003c/strong\u003e He settled in London. He devoted himself chiefly to practical teaching.\u003c/p\u003e\n              \u003cp\u003e\u003cstrong\u003eASSET:\u003c/strong\u003e He lived in London. He was a teacher.\u003c/p\u003e\n            \u003c/blockquote\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe goal is to communicate the same information as the source sentence using simpler words and grammar.\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikipedia\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset uses language from Wikipedia: some demographic information is provided \u003ca\n                href=\"https://en.wikipedia.org/wiki/Wikipedia:Who_writes_Wikipedia%3F\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe authors mention that they \"extracted 138,095 article pairs from the 2019/09 Wikipedia dump using an\n              improved version of the \u003ca href=\"https://github.com/attardi/wikiextractor\"\u003eWikiExtractor\u003c/a\u003e library\". The\n              \u003ca href=\"https://spacy.io/\"\u003eSpaCy\u003c/a\u003e library is used for sentence splitting.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecrowd-sourced\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eNumber of Raters\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the number of raters\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e11\u0026#x3C;n\u0026#x3C;50\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRater Qualifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the qualifications required of an annotator.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto (Figure Eight): No information provided.\u003c/p\u003e\n            \u003cp\u003eASSET (MTurk):\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eHaving a HIT approval rate over 95%, and over 1000 HITs approved. No other demographic or compensation\n                information is provided.\u003c/li\u003e\n              \u003cli\u003ePassing a Qualification Test (appropriately simplifying sentences). Out of 100 workers, 42 passed the\n                test.\u003c/li\u003e\n              \u003cli\u003eBeing a resident of the United States, United Kingdom or Canada.\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eTURK (MTurk):\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003eReference sentences were written by workers with HIT approval rate over 95%. No other demographic or\n                compensation information is provided.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Training Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each training example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e1\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eRaters per Test Example\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow many annotators saw each test example?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cblockquote\u003e\n              \u003cp\u003e5\u003c/p\u003e\n            \u003c/blockquote\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhich Annotation Service\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhich annotation services were used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmazon Mechanical Turk\u003c/code\u003e, \u003ccode\u003eAppen\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiAuto: Sentence alignment labels were crowdsourced for 500 randomly sampled document pairs (10,123\n              sentence pairs total). The authors pre-selected several alignment candidates from English Wikipedia for\n              each Simple Wikipedia sentence based on various similarity metrics, then asked the crowd-workers to\n              annotate these pairs. Finally, they trained their alignment model on this manually annotated dataset to\n              obtain automatically aligned sentences (138,095 document pairs, 488,332 sentence pairs).\n              No demographic annotation is provided for the crowd workers. The \u003ca\n                href=\"https://www.figure-eight.com/\"\u003eFigure Eight\u003c/a\u003e platform now part of Appen) was used for the\n              annotation process.\u003c/p\u003e\n            \u003cp\u003eASSET: The instructions given to the annotators are available \u003ca\n                href=\"https://github.com/facebookresearch/asset/blob/master/crowdsourcing/AMT_AnnotationInstructions.pdf\"\u003ehere\u003c/a\u003e.\n            \u003c/p\u003e\n            \u003cp\u003eTURK: The references are crowdsourced from Amazon Mechanical Turk. The annotators were asked to provide\n              simplifications without losing any information or splitting the input sentence. No other demographic or\n              compensation information is provided in the TURKCorpus paper. The instructions given to the annotators are\n              available in the paper.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBoth Figure Eight and Amazon Mechanical Turk raters forfeit the right to their data as part of their\n              agreements.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the dataset is created from Wikipedia/Simple Wikipedia, all the information contained in the\n              dataset is already in the public domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset may contain some social biases, as the input sentences are based on Wikipedia. Studies have\n              shown that the English Wikipedia contains both gender biases \u003ca\n                href=\"https://research.tudelft.nl/en/publications/is-wikipedia-succeeding-in-reducing-gender-bias-assessing-changes\"\u003e(Schmahl\n                et al., 2020)\u003c/a\u003e and racial biases \u003ca\n                href=\"https://journals.sagepub.com/doi/pdf/10.1177/2378023118823946\"\u003e(Adams et al., 2019)\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePotential PII Risk\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eConsidering your answers to the PII part of the Data Curation Section, describe any potential\n                    privacy to the data subjects and creators risks when using the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAll the data is in the public domain.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eopen license - commercial use allowed\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset may contain some social biases, as the input sentences are based on Wikipedia. Studies have\n              shown that the English Wikipedia contains both gender biases \u003ca\n                href=\"https://research.tudelft.nl/en/publications/is-wikipedia-succeeding-in-reducing-gender-bias-assessing-changes\"\u003e(Schmahl\n                et al., 2020)\u003c/a\u003e and racial biases \u003ca\n                href=\"https://journals.sagepub.com/doi/pdf/10.1177/2378023118823946\"\u003e(Adams et al., 2019)\u003c/a\u003e.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the test datasets contains only 2,359 sentences that are derived from Wikipedia, they are limited\n              to a small subset of topics present on Wikipedia.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"wiki_auto_asset_turk","type":"Simplification","languages":"English","summary":"WikiAuto is an English simplification dataset that we paired with ASSET and TURK, two very high-quality evaluation datasets, as test sets. The input is an English sentence taken from Wikipedia and the target a simplified sentence. ASSET and TURK contain the same test examples but have references that are simplified in different ways (splitting sentences vs. rewriting and splitting)."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"wiki_auto_asset_turk"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/wiki_cat_sum.html b/data_cards/wiki_cat_sum.html
index e9736397..7256da7b 100644
--- a/data_cards/wiki_cat_sum.html
+++ b/data_cards/wiki_cat_sum.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->wiki_cat_sum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">wiki_cat_sum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->wiki_cat_sum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">wiki_cat_sum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>wiki_cat_sum</h2>
@@ -1696,4 +1696,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"wiki_cat_sum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ewiki_cat_sum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eWikiCatSum is an English summarization dataset in three domains: animals, companies, and film. It provides\n          multiple paragraphs of text paired with a summary of the paragraphs.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/wiki_cat_sum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/wiki_cat_sum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/lauhaide/WikiCatSum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1906.04687\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLaura Perez-Beltrachini, Yang Liu, Mirella Lapata (University of Edinburgh) Peter J. Liu, Mohammad Saleh,\n          Etienne Pot, Ben Goodrich, Ryan Sepassi, Lukasz Kaiser, Noam Shazeer (GoogleBrain)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLaura Perez-Beltrachini\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-3.0: Creative Commons Attribution Share Alike 3.0 Unported\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSummarise the most important facts of a given entity in the Film, Company, and Animal domains from a cluster\n          of related documents.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eautomatically created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/lauhaide/WikiCatSum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://datashare.ed.ac.uk/handle/10283/3368\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1906.04687\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{perez-beltrachini-etal-2019-generating,\ntitle = \"Generating Summaries with Topic Templates and Structured Convolutional Decoders\",\nauthor = \"Perez-Beltrachini, Laura  and\nLiu, Yang  and\nLapata, Mirella\",\nbooktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2019\",\naddress = \"Florence, Italy\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/P19-1504\",\ndoi = \"10.18653/v1/P19-1504\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:lperez@ed.ac.uk\"\u003elperez@ed.ac.uk\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-3.0: Creative Commons Attribution Share Alike 3.0 Unported\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eResearch on multi-document abstractive summarisation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarise the most important facts of a given entity in the Film, Company, and Animal domains from a\n              cluster of related documents.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e, \u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle Cloud Platform, University of Edinburgh\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini, Yang Liu, Mirella Lapata (University of Edinburgh) Peter J. Liu, Mohammad Saleh,\n              Etienne Pot, Ben Goodrich, Ryan Sepassi, Lukasz Kaiser, Noam Shazeer (GoogleBrain)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle Cloud Platform, European Research Council\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRonald Cardenas (University of Edinburgh) Laura Perez-Beltrachini (University of Edinburgh)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eid\u003c/code\u003e: ID of the data example\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etitle\u003c/code\u003e: Is the Wikipedia article's title\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eparagraphs\u003c/code\u003e: Is the ranked list of paragraphs from the set of crawled texts\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary\u003c/code\u003e: Is constituted by a list of sentences together with their corresponding topic\n                label\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis is a truncated example from the animal setting:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'gem_id': 'animal-train-1',\n'gem_parent_id': 'animal-train-1',\n'id': '2652',\n'paragraphs': [\"lytrosis (hulst) of louisiana vernon antoine brou jr. 2005. southern lepidopterists' news, 27: 7 ., ...\"],\n'references': ['lytrosis unitaria , the common lytrosis moth, is a species of moth of the geometridae family. it is found in north america, including arkansas, georgia, iowa , massachusetts, and wisconsin. the wingspan is about 50 mm. the larvae feed on rosa, crataegus, amelanchier, acer, quercus and viburnum species.'],\n'summary': {'text': ['lytrosis unitaria , the common lytrosis moth , is a species of moth of the geometridae family .',\n'it is found in north america , including arkansas , georgia , iowa , massachusetts , new hampshire , new jersey , new york , north carolina , ohio , oklahoma , ontario , pennsylvania , south carolina , tennessee , texas , virginia , west virginia and wisconsin .',\n'the wingspan is about 50 mm .',\n'the larvae feed on rosa , crataegus , amelanchier , acer , quercus and viburnum species . '],\n'topic': [29, 20, 9, 8]},\n'target': 'lytrosis unitaria , the common lytrosis moth, is a species of moth of the geometridae family. it is found in north america, including arkansas, georgia, iowa , massachusetts, and wisconsin. the wingspan is about 50 mm. the larvae feed on rosa, crataegus, amelanchier, acer, quercus and viburnum species.',\n'title': 'lytrosis unitaria'}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNb of instances in train/valid/test are 50,938/2,855/2,831\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data was split i.i.d., i.e. uniformly split into training, validation, and test datasets.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEvaluation of models' performance on noisy (document, summary) pairs and long inputs.\n              Evaluate models' capabilities to generalise and mitigate biases.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCapabilities to generalise, mitigate biases, factual correctness.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eannotations added\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe provide topic labels for summary sentences.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/1801.10198\"\u003eGenerating Wikipedia by Summarizing Long Sequences\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/1906.04687\"\u003eGenerating Summaries with Topic Templates and Structured\n                  Convolutional Decoders\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/2009.07032\"\u003eNoisy Self-Knowledge Distillation for Text\n                  Summarization\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eAnd all references in these papers.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCapabilities to generalise, mitigate biases, factual correctness.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eMoverScore\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eAbstract/Copy\u003c/li\u003e\n              \u003cli\u003eFactual accuracy based on the score of (Goodrich et al., 2019) and the relation extraction system of\n                (Sorokin and Gurevych, 2017).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHuman based are Question Answering and Ranking (Content, Fluency and Repetition)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThose listed above.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerating Summaries with Topic Templates and Structured Convolutional Decoders\n              \u003ca href=\"https://arxiv.org/abs/1906.04687\"\u003ehttps://arxiv.org/abs/1906.04687\u003c/a\u003e\n            \u003c/p\u003e\n            \u003cp\u003eNoisy Self-Knowledge Distillation for Text Summarization\n              \u003ca href=\"https://arxiv.org/abs/2009.07032\"\u003ehttps://arxiv.org/abs/2009.07032\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is a subset of the WikiSum (Liu et al., 2018) dataset focusing on summaries of entities in\n              three domains (Film, Company, and Animal). It is multi-document summarisation where input-output pairs for\n              each example entity are created as follows. The input is a set of paragraphs collected from i) documents\n              in the Reference section of the entity's Wikipedia page plus ii) documents collected from the top ten\n              search results after querying Google search engine with the entity name. The output summary is the\n              Wikipedia abstract for the entity.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate descriptive summaries with specific domains, where certain topics are discussed and generally in\n              specific orders.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiSum (Liu et al., 2018)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset and task focuses on summaries for entities in three domains: Company, Film, and Animal.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummary sentences are associated with a topic label. There is a topic model for each domain.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eautomatically created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach summary sentences was annotated with a topic label. There is a topic model for each of the three\n              domains. This was used to guide a hierarchical decoder.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curators\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eManual inspection of a sample of topics assigned to sentences. The number of topics was selected based on\n              the performance of the summarisation model.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is base on Wikipedia and referenced and retrieved documents crawled from the Web.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is based on Wikipedia and thus biases analysis on other Wikipedia-based datasets are\n              potentially true for WikiCatSum. For instance, see analysis for the ToTTo dataset here [1].\u003c/p\u003e\n            \u003cp\u003e[1] Automatic Construction of Evaluation Suites for Natural Language Generation Datasets\n              \u003ca href=\"https://openreview.net/forum?id=CSi1eu_2q96\"\u003ehttps://openreview.net/forum?id=CSi1eu_2q96\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"wiki_cat_sum","type":"Summarization","languages":"English","summary":"WikiCatSum is an English summarization dataset in three domains - animals, companies, and film. It provides multiple paragraphs of text paired with a summary of the paragraphs."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"wiki_cat_sum"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"wiki_cat_sum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ewiki_cat_sum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eWikiCatSum is an English summarization dataset in three domains: animals, companies, and film. It provides\n          multiple paragraphs of text paired with a summary of the paragraphs.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/wiki_cat_sum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/wiki_cat_sum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/lauhaide/WikiCatSum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1906.04687\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLaura Perez-Beltrachini, Yang Liu, Mirella Lapata (University of Edinburgh) Peter J. Liu, Mohammad Saleh,\n          Etienne Pot, Ben Goodrich, Ryan Sepassi, Lukasz Kaiser, Noam Shazeer (GoogleBrain)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLaura Perez-Beltrachini\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-3.0: Creative Commons Attribution Share Alike 3.0 Unported\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSummarise the most important facts of a given entity in the Film, Company, and Animal domains from a cluster\n          of related documents.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eautomatically created\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eunlikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/lauhaide/WikiCatSum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://datashare.ed.ac.uk/handle/10283/3368\"\u003eWebsite\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/1906.04687\"\u003eArxiv\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{perez-beltrachini-etal-2019-generating,\ntitle = \"Generating Summaries with Topic Templates and Structured Convolutional Decoders\",\nauthor = \"Perez-Beltrachini, Laura  and\nLiu, Yang  and\nLapata, Mirella\",\nbooktitle = \"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics\",\nmonth = jul,\nyear = \"2019\",\naddress = \"Florence, Italy\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/P19-1504\",\ndoi = \"10.18653/v1/P19-1504\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:lperez@ed.ac.uk\"\u003elperez@ed.ac.uk\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-3.0: Creative Commons Attribution Share Alike 3.0 Unported\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eResearch on multi-document abstractive summarisation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarise the most important facts of a given entity in the Film, Company, and Animal domains from a\n              cluster of related documents.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eindustry\u003c/code\u003e, \u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle Cloud Platform, University of Edinburgh\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini, Yang Liu, Mirella Lapata (University of Edinburgh) Peter J. Liu, Mohammad Saleh,\n              Etienne Pot, Ben Goodrich, Ryan Sepassi, Lukasz Kaiser, Noam Shazeer (GoogleBrain)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGoogle Cloud Platform, European Research Council\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eRonald Cardenas (University of Edinburgh) Laura Perez-Beltrachini (University of Edinburgh)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eid\u003c/code\u003e: ID of the data example\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etitle\u003c/code\u003e: Is the Wikipedia article's title\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eparagraphs\u003c/code\u003e: Is the ranked list of paragraphs from the set of crawled texts\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary\u003c/code\u003e: Is constituted by a list of sentences together with their corresponding topic\n                label\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis is a truncated example from the animal setting:\u003c/p\u003e\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{'gem_id': 'animal-train-1',\n'gem_parent_id': 'animal-train-1',\n'id': '2652',\n'paragraphs': [\"lytrosis (hulst) of louisiana vernon antoine brou jr. 2005. southern lepidopterists' news, 27: 7 ., ...\"],\n'references': ['lytrosis unitaria , the common lytrosis moth, is a species of moth of the geometridae family. it is found in north america, including arkansas, georgia, iowa , massachusetts, and wisconsin. the wingspan is about 50 mm. the larvae feed on rosa, crataegus, amelanchier, acer, quercus and viburnum species.'],\n'summary': {'text': ['lytrosis unitaria , the common lytrosis moth , is a species of moth of the geometridae family .',\n'it is found in north america , including arkansas , georgia , iowa , massachusetts , new hampshire , new jersey , new york , north carolina , ohio , oklahoma , ontario , pennsylvania , south carolina , tennessee , texas , virginia , west virginia and wisconsin .',\n'the wingspan is about 50 mm .',\n'the larvae feed on rosa , crataegus , amelanchier , acer , quercus and viburnum species . '],\n'topic': [29, 20, 9, 8]},\n'target': 'lytrosis unitaria , the common lytrosis moth, is a species of moth of the geometridae family. it is found in north america, including arkansas, georgia, iowa , massachusetts, and wisconsin. the wingspan is about 50 mm. the larvae feed on rosa, crataegus, amelanchier, acer, quercus and viburnum species.',\n'title': 'lytrosis unitaria'}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNb of instances in train/valid/test are 50,938/2,855/2,831\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data was split i.i.d., i.e. uniformly split into training, validation, and test datasets.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEvaluation of models' performance on noisy (document, summary) pairs and long inputs.\n              Evaluate models' capabilities to generalise and mitigate biases.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCapabilities to generalise, mitigate biases, factual correctness.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eannotations added\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe provide topic labels for summary sentences.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003ePointers to Resources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eGetting started with in-depth research on the task. Add relevant pointers to resources that\n                    researchers can consult when they want to get started digging deeper into the task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/1801.10198\"\u003eGenerating Wikipedia by Summarizing Long Sequences\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/1906.04687\"\u003eGenerating Summaries with Topic Templates and Structured\n                  Convolutional Decoders\u003c/a\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ca href=\"https://arxiv.org/abs/2009.07032\"\u003eNoisy Self-Knowledge Distillation for Text\n                  Summarization\u003c/a\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n            \u003cp\u003eAnd all references in these papers.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCapabilities to generalise, mitigate biases, factual correctness.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e, \u003ccode\u003eBERT-Score\u003c/code\u003e, \u003ccode\u003eMoverScore\u003c/code\u003e, \u003ccode\u003eOther: Other Metrics\u003c/code\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Metrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDefinitions of other metrics\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eAbstract/Copy\u003c/li\u003e\n              \u003cli\u003eFactual accuracy based on the score of (Goodrich et al., 2019) and the relation extraction system of\n                (Sorokin and Gurevych, 2017).\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHuman based are Question Answering and Ranking (Content, Fluency and Repetition)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThose listed above.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eRelevant Previous Results\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are the most relevant previous results for this task/dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerating Summaries with Topic Templates and Structured Convolutional Decoders\n              \u003ca href=\"https://arxiv.org/abs/1906.04687\"\u003ehttps://arxiv.org/abs/1906.04687\u003c/a\u003e\n            \u003c/p\u003e\n            \u003cp\u003eNoisy Self-Knowledge Distillation for Text Summarization\n              \u003ca href=\"https://arxiv.org/abs/2009.07032\"\u003ehttps://arxiv.org/abs/2009.07032\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is a subset of the WikiSum (Liu et al., 2018) dataset focusing on summaries of entities in\n              three domains (Film, Company, and Animal). It is multi-document summarisation where input-output pairs for\n              each example entity are created as follows. The input is a set of paragraphs collected from i) documents\n              in the Reference section of the entity's Wikipedia page plus ii) documents collected from the top ten\n              search results after querying Google search engine with the entity name. The output summary is the\n              Wikipedia abstract for the entity.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGenerate descriptive summaries with specific domains, where certain topics are discussed and generally in\n              specific orders.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiSum (Liu et al., 2018)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eOther\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset and task focuses on summaries for entities in three domains: Company, Film, and Animal.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummary sentences are associated with a topic label. There is a topic model for each domain.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eautomatically created\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEach summary sentences was annotated with a topic label. There is a topic model for each of the three\n              domains. This was used to guide a hierarchical decoder.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by data curators\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eManual inspection of a sample of topics assigned to sentences. The number of topics was selected based on\n              the performance of the summarisation model.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is base on Wikipedia and referenced and retrieved documents crawled from the Web.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunlikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLinks and Summaries of Analysis Work\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide links to and summaries of works analyzing these biases.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset is based on Wikipedia and thus biases analysis on other Wikipedia-based datasets are\n              potentially true for WikiCatSum. For instance, see analysis for the ToTTo dataset here [1].\u003c/p\u003e\n            \u003cp\u003e[1] Automatic Construction of Evaluation Suites for Natural Language Generation Datasets\n              \u003ca href=\"https://openreview.net/forum?id=CSi1eu_2q96\"\u003ehttps://openreview.net/forum?id=CSi1eu_2q96\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"wiki_cat_sum","type":"Summarization","languages":"English","summary":"WikiCatSum is an English summarization dataset in three domains - animals, companies, and film. It provides multiple paragraphs of text paired with a summary of the paragraphs."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"wiki_cat_sum"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/wiki_lingua.html b/data_cards/wiki_lingua.html
index b891cac6..ecf567d9 100644
--- a/data_cards/wiki_lingua.html
+++ b/data_cards/wiki_lingua.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->wiki_lingua</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">wiki_lingua</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->wiki_lingua</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">wiki_lingua</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>wiki_lingua</h2>
@@ -1606,4 +1606,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"wiki_lingua","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ewiki_lingua\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003ePlaceholder\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/wiki_lingua')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/wiki_lingua\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eNone (See Repository)\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca\n            href=\"https://www.aclweb.org/anthology/2020.findings-emnlp.360/\"\u003ehttps://www.aclweb.org/anthology/2020.findings-emnlp.360/\u003c/a\u003e\n        \u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eFaisal Ladhak (Columbia University), Esin Durmus (Stanford University), Claire Cardie (Cornell University),\n          Kathleen McKeown (Columbia University)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eFaisal Ladhak, Esin Durmus\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e, \u003ccode\u003ePortuguese\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n          \u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eItalian\u003c/code\u003e, \u003ccode\u003eIndonesian\u003c/code\u003e,\n          \u003ccode\u003eDutch, Flemish\u003c/code\u003e, \u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eChinese\u003c/code\u003e, \u003ccode\u003eVietnamese\u003c/code\u003e,\n          \u003ccode\u003eThai\u003c/code\u003e, \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e, \u003ccode\u003eCzech\u003c/code\u003e,\n          \u003ccode\u003eTurkish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-3.0: Creative Commons Attribution 3.0 Unported\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProduce a high quality summary for the given input article.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone (See Repository)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/esdurmus/Wikilingua\"\u003ehttps://github.com/esdurmus/Wikilingua\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://www.aclweb.org/anthology/2020.findings-emnlp.360/\"\u003ehttps://www.aclweb.org/anthology/2020.findings-emnlp.360/\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e@inproceedings{ladhak-etal-2020-wikilingua,\n              title = \"{W}iki{L}ingua: A New Benchmark Dataset for Cross-Lingual Abstractive Summarization\",\n              author = \"Ladhak, Faisal and\n              Durmus, Esin and\n              Cardie, Claire and\n              McKeown, Kathleen\",\n              booktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2020\",\n              month = nov,\n              year = \"2020\",\n              address = \"Online\",\n              publisher = \"Association for Computational Linguistics\",\n              url = \"\u003ca\n                href=\"https://aclanthology.org/2020.findings-emnlp.360\"\u003ehttps://aclanthology.org/2020.findings-emnlp.360\u003c/a\u003e\",\n              doi = \"10.18653/v1/2020.findings-emnlp.360\",\n              pages = \"4034--4048\",\n              abstract = \"We introduce WikiLingua, a large-scale, multilingual dataset for the evaluation of\n              cross-lingual abstractive summarization systems. We extract article and summary pairs in 18 languages from\n              WikiHow, a high quality, collaborative resource of how-to guides on a diverse set of topics written by\n              human authors. We create gold-standard article-summary alignments across languages by aligning the images\n              that are used to describe each how-to step in an article. As a set of baselines for further studies, we\n              evaluate the performance of existing cross-lingual abstractive summarization methods on our dataset. We\n              further propose a method for direct cross-lingual summarization (i.e., without requiring translation at\n              inference time) by leveraging synthetic data and Neural Machine Translation as a pre-training step. Our\n              method significantly outperforms the baseline approaches, while being more cost efficient during\n              inference.\",\n              }\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFaisal Ladhak, Esin Durmus\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:faisal@cs.columbia.edu\"\u003efaisal@cs.columbia.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:esdurmus@stanford.edu\"\u003eesdurmus@stanford.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDataset does not have multiple dialects per language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e, \u003ccode\u003ePortuguese\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n              \u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eItalian\u003c/code\u003e, \u003ccode\u003eIndonesian\u003c/code\u003e,\n              \u003ccode\u003eDutch, Flemish\u003c/code\u003e, \u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eChinese\u003c/code\u003e, \u003ccode\u003eVietnamese\u003c/code\u003e,\n              \u003ccode\u003eThai\u003c/code\u003e, \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e, \u003ccode\u003eCzech\u003c/code\u003e,\n              \u003ccode\u003eTurkish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo information about the user demographic is available.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-3.0: Creative Commons Attribution 3.0 Unported\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was intended to serve as a large-scale, high-quality benchmark dataset for cross-lingual\n              summarization.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProduce a high quality summary for the given input article.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eColumbia University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFaisal Ladhak (Columbia University), Esin Durmus (Stanford University), Claire Cardie (Cornell\n              University), Kathleen McKeown (Columbia University)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenny Chim (Queen Mary University of London), Faisal Ladhak (Columbia University)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003egem_id -- The id for the data instance.\n              source_language -- The language of the source article.\n              target_language -- The language of the target summary.\n              source -- The source document.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{\n              \"gem_id\": \"wikilingua_crosslingual-train-12345\",\n              \"gem_parent_id\": \"wikilingua_crosslingual-train-12345\",\n              \"source_language\": \"fr\",\n              \"target_language\": \"de\",\n              \"source\": \"Document in fr\",\n              \"target\": \"Summary in de\",\n              }\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is split into train/dev/test. In addition to the full test set, there's also a sampled version\n              of the test set.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data was split to ensure the same document would appear in the same split across languages so as to\n              ensure there's no leakage into the test set.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset provides a large-scale, high-quality resource for cross-lingual summarization in 18\n              languages, increasing the coverage of languages for the GEM summarization task.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eXSum covers English news articles, and MLSum covers news articles in German and Spanish.\n              In contrast, this dataset has how-to articles in 18 languages, substantially increasing the languages\n              covered. Moreover, it also provides a a different domain than the other two datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe ability to generate quality summaries across multiple languages.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePrevious version had separate data loaders for each language. In this version, we've created a single\n              monolingual data loader, which contains monolingual data in each of the 18 languages. In addition, we've\n              also created a single cross-lingual data loader across all the language pairs in the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbility to summarize content across different languages.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE is used to measure content selection by comparing word overlap with reference summaries. In\n              addition, the authors of the dataset also used human evaluation to evaluate content selection and fluency\n              of the systems.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was created in order to enable new approaches for cross-lingual and multilingual\n              summarization, which are currently understudied as well as open up inetersting new directions for research\n              in summarization. E.g., exploration of multi-source cross-lingual architectures, i.e. models that can\n              summarize from multiple source languages into a target language, building models that can summarize\n              articles from any language to any other language for a given set of languages.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGiven an input article, produce a high quality summary of the article in the target language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiHow, which is an online resource of how-to guides (written and reviewed by human authors) is used as\n              the data source.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe articles cover 19 broad categories including health, arts and entertainment, personal care and style,\n              travel, education and communications, etc. The categories cover a broad set of genres and topics.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e(1) Text Content. All text posted by Users to the Service is sub-licensed by wikiHow to other Users under\n              a Creative Commons license as provided herein. The Creative Commons license allows such text content be\n              used freely for non-commercial purposes, so long as it is used and attributed to the original author as\n              specified under the terms of the license. Allowing free republication of our articles helps wikiHow\n              achieve its mission by providing instruction on solving the problems of everyday life to more people for\n              free. In order to support this goal, wikiHow hereby grants each User of the Service a license to all text\n              content that Users contribute to the Service under the terms and conditions of a Creative Commons CC\n              BY-NC-SA 3.0 License. Please be sure to read the terms of the license carefully. You continue to own all\n              right, title, and interest in and to your User Content, and you are free to distribute it as you wish,\n              whether for commercial or non-commercial purposes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is made freely available under the Creative Commons license, therefore there are no restrictions\n              about downstream uses as long is it's for non-commercial purposes.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOnly the article text and summaries were collected. No user information was retained in the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes - other datasets featuring the same task\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"wiki_lingua","type":"Summarization","languages":"English, Spanish, Castilian, Portuguese, French, German, Russian, Italian, Indonesian, Dutch, Flemish, Arabic, Chinese, Vietnamese, Thai, Japanese, Korean, Hindi, Czech, Turkish","summary":"Placeholder"}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"wiki_lingua"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"wiki_lingua","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003ewiki_lingua\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003ePlaceholder\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/wiki_lingua')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/wiki_lingua\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eNone (See Repository)\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca\n            href=\"https://www.aclweb.org/anthology/2020.findings-emnlp.360/\"\u003ehttps://www.aclweb.org/anthology/2020.findings-emnlp.360/\u003c/a\u003e\n        \u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eFaisal Ladhak (Columbia University), Esin Durmus (Stanford University), Claire Cardie (Cornell University),\n          Kathleen McKeown (Columbia University)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eFaisal Ladhak, Esin Durmus\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e, \u003ccode\u003ePortuguese\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n          \u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eItalian\u003c/code\u003e, \u003ccode\u003eIndonesian\u003c/code\u003e,\n          \u003ccode\u003eDutch, Flemish\u003c/code\u003e, \u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eChinese\u003c/code\u003e, \u003ccode\u003eVietnamese\u003c/code\u003e,\n          \u003ccode\u003eThai\u003c/code\u003e, \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e, \u003ccode\u003eCzech\u003c/code\u003e,\n          \u003ccode\u003eTurkish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-3.0: Creative Commons Attribution 3.0 Unported\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eProduce a high quality summary for the given input article.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNone (See Repository)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/esdurmus/Wikilingua\"\u003ehttps://github.com/esdurmus/Wikilingua\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca\n                href=\"https://www.aclweb.org/anthology/2020.findings-emnlp.360/\"\u003ehttps://www.aclweb.org/anthology/2020.findings-emnlp.360/\u003c/a\u003e\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e@inproceedings{ladhak-etal-2020-wikilingua,\n              title = \"{W}iki{L}ingua: A New Benchmark Dataset for Cross-Lingual Abstractive Summarization\",\n              author = \"Ladhak, Faisal and\n              Durmus, Esin and\n              Cardie, Claire and\n              McKeown, Kathleen\",\n              booktitle = \"Findings of the Association for Computational Linguistics: EMNLP 2020\",\n              month = nov,\n              year = \"2020\",\n              address = \"Online\",\n              publisher = \"Association for Computational Linguistics\",\n              url = \"\u003ca\n                href=\"https://aclanthology.org/2020.findings-emnlp.360\"\u003ehttps://aclanthology.org/2020.findings-emnlp.360\u003c/a\u003e\",\n              doi = \"10.18653/v1/2020.findings-emnlp.360\",\n              pages = \"4034--4048\",\n              abstract = \"We introduce WikiLingua, a large-scale, multilingual dataset for the evaluation of\n              cross-lingual abstractive summarization systems. We extract article and summary pairs in 18 languages from\n              WikiHow, a high quality, collaborative resource of how-to guides on a diverse set of topics written by\n              human authors. We create gold-standard article-summary alignments across languages by aligning the images\n              that are used to describe each how-to step in an article. As a set of baselines for further studies, we\n              evaluate the performance of existing cross-lingual abstractive summarization methods on our dataset. We\n              further propose a method for direct cross-lingual summarization (i.e., without requiring translation at\n              inference time) by leveraging synthetic data and Neural Machine Translation as a pre-training step. Our\n              method significantly outperforms the baseline approaches, while being more cost efficient during\n              inference.\",\n              }\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFaisal Ladhak, Esin Durmus\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:faisal@cs.columbia.edu\"\u003efaisal@cs.columbia.edu\u003c/a\u003e, \u003ca\n                href=\"mailto:esdurmus@stanford.edu\"\u003eesdurmus@stanford.edu\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eDataset does not have multiple dialects per language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e, \u003ccode\u003ePortuguese\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n              \u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e, \u003ccode\u003eItalian\u003c/code\u003e, \u003ccode\u003eIndonesian\u003c/code\u003e,\n              \u003ccode\u003eDutch, Flemish\u003c/code\u003e, \u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eChinese\u003c/code\u003e, \u003ccode\u003eVietnamese\u003c/code\u003e,\n              \u003ccode\u003eThai\u003c/code\u003e, \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e, \u003ccode\u003eCzech\u003c/code\u003e,\n              \u003ccode\u003eTurkish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNo information about the user demographic is available.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-3.0: Creative Commons Attribution 3.0 Unported\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was intended to serve as a large-scale, high-quality benchmark dataset for cross-lingual\n              summarization.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProduce a high quality summary for the given input article.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eColumbia University\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFaisal Ladhak (Columbia University), Esin Durmus (Stanford University), Claire Cardie (Cornell\n              University), Kathleen McKeown (Columbia University)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eJenny Chim (Queen Mary University of London), Faisal Ladhak (Columbia University)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003egem_id -- The id for the data instance.\n              source_language -- The language of the source article.\n              target_language -- The language of the target summary.\n              source -- The source document.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e{\n              \"gem_id\": \"wikilingua_crosslingual-train-12345\",\n              \"gem_parent_id\": \"wikilingua_crosslingual-train-12345\",\n              \"source_language\": \"fr\",\n              \"target_language\": \"de\",\n              \"source\": \"Document in fr\",\n              \"target\": \"Summary in de\",\n              }\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is split into train/dev/test. In addition to the full test set, there's also a sampled version\n              of the test set.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data was split to ensure the same document would appear in the same split across languages so as to\n              ensure there's no leakage into the test set.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset provides a large-scale, high-quality resource for cross-lingual summarization in 18\n              languages, increasing the coverage of languages for the GEM summarization task.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eXSum covers English news articles, and MLSum covers news articles in German and Spanish.\n              In contrast, this dataset has how-to articles in 18 languages, substantially increasing the languages\n              covered. Moreover, it also provides a a different domain than the other two datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe ability to generate quality summaries across multiple languages.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eGEM Modifications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat changes have been made to he original dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eother\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eModification Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eFor each of these changes, described them in more details and provided the intended purpose of the\n                    modification\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ePrevious version had separate data loaders for each language. In this version, we've created a single\n              monolingual data loader, which contains monolingual data in each of the 18 languages. In addition, we've\n              also created a single cross-lingual data loader across all the language pairs in the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbility to summarize content across different languages.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE is used to measure content selection by comparing word overlap with reference summaries. In\n              addition, the authors of the dataset also used human evaluation to evaluate content selection and fluency\n              of the systems.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset was created in order to enable new approaches for cross-lingual and multilingual\n              summarization, which are currently understudied as well as open up inetersting new directions for research\n              in summarization. E.g., exploration of multi-source cross-lingual architectures, i.e. models that can\n              summarize from multiple source languages into a target language, building models that can summarize\n              articles from any language to any other language for a given set of languages.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGiven an input article, produce a high quality summary of the article in the target language.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWikiHow, which is an online resource of how-to guides (written and reviewed by human authors) is used as\n              the data source.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe articles cover 19 broad categories including health, arts and entertainment, personal care and style,\n              travel, education and communications, etc. The categories cover a broad set of genres and topics.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e(1) Text Content. All text posted by Users to the Service is sub-licensed by wikiHow to other Users under\n              a Creative Commons license as provided herein. The Creative Commons license allows such text content be\n              used freely for non-commercial purposes, so long as it is used and attributed to the original author as\n              specified under the terms of the license. Allowing free republication of our articles helps wikiHow\n              achieve its mission by providing instruction on solving the problems of everyday life to more people for\n              free. In order to support this goal, wikiHow hereby grants each User of the Service a license to all text\n              content that Users contribute to the Service under the terms and conditions of a Creative Commons CC\n              BY-NC-SA 3.0 License. Please be sure to read the terms of the license carefully. You continue to own all\n              right, title, and interest in and to your User Content, and you are free to distribute it as you wish,\n              whether for commercial or non-commercial purposes.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eOther Consented Downstream Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat other downstream uses of the data did the original data creators and the data curators consent\n                    to?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data is made freely available under the Creative Commons license, therefore there are no restrictions\n              about downstream uses as long is it's for non-commercial purposes.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eJustification for no PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a justification for selecting \u003ccode\u003eno PII\u003c/code\u003e above.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eOnly the article text and summaries were collected. No user information was retained in the dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes - other datasets featuring the same task\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"wiki_lingua","type":"Summarization","languages":"English, Spanish, Castilian, Portuguese, French, German, Russian, Italian, Indonesian, Dutch, Flemish, Arabic, Chinese, Vietnamese, Thai, Japanese, Korean, Hindi, Czech, Turkish","summary":"Placeholder"}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"wiki_lingua"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/xlsum.html b/data_cards/xlsum.html
index cd64b2bb..a550c2e3 100644
--- a/data_cards/xlsum.html
+++ b/data_cards/xlsum.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->xlsum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">xlsum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->xlsum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">xlsum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -2221,4 +2221,4 @@ <h5>Discouraged Use Cases
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"xlsum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003exlsum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eXLSum is a highly multilingual summarization dataset supporting 44 language. The data stems from BBC news\n          articles.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/xlsum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/xlsum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/csebuetnlp/xl-sum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.413/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eTahmid Hasan\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eAmharic\u003c/code\u003e, \u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eAzerbaijani\u003c/code\u003e, \u003ccode\u003eBengali, Bangla\u003c/code\u003e,\n          \u003ccode\u003eBurmese\u003c/code\u003e, \u003ccode\u003eChinese (family)\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n          \u003ccode\u003eGujarati\u003c/code\u003e, \u003ccode\u003eHausa\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e, \u003ccode\u003eIgbo\u003c/code\u003e, \u003ccode\u003eIndonesian\u003c/code\u003e,\n          \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eRundi\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003eKirghiz, Kyrgyz\u003c/code\u003e,\n          \u003ccode\u003eMarathi\u003c/code\u003e, \u003ccode\u003eNepali (individual language)\u003c/code\u003e, \u003ccode\u003eOromo\u003c/code\u003e,\n          \u003ccode\u003ePushto, Pashto\u003c/code\u003e, \u003ccode\u003ePersian\u003c/code\u003e, \u003ccode\u003eGhanaian Pidgin English\u003c/code\u003e,\n          \u003ccode\u003ePortuguese\u003c/code\u003e, \u003ccode\u003ePanjabi, Punjabi\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e,\n          \u003ccode\u003eScottish Gaelic, Gaelic\u003c/code\u003e, \u003ccode\u003eSerbian\u003c/code\u003e, \u003ccode\u003eRomano-Serbian\u003c/code\u003e,\n          \u003ccode\u003eSinhala, Sinhalese\u003c/code\u003e, \u003ccode\u003eSomali\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e,\n          \u003ccode\u003eSwahili (individual language), Kiswahili\u003c/code\u003e, \u003ccode\u003eTamil\u003c/code\u003e, \u003ccode\u003eTelugu\u003c/code\u003e,\n          \u003ccode\u003eThai\u003c/code\u003e, \u003ccode\u003eTigrinya\u003c/code\u003e, \u003ccode\u003eTurkish\u003c/code\u003e, \u003ccode\u003eUkrainian\u003c/code\u003e, \u003ccode\u003eUrdu\u003c/code\u003e,\n          \u003ccode\u003eUzbek\u003c/code\u003e, \u003ccode\u003eVietnamese\u003c/code\u003e, \u003ccode\u003eWelsh\u003c/code\u003e, \u003ccode\u003eYoruba\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-sa-4.0: Creative Commons Attribution Non Commercial Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSummarize news-like text in one of 45 languages.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003elikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/csebuetnlp/xl-sum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://huggingface.co/datasets/csebuetnlp/xlsum/tree/main/data\"\u003eHuggingface\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.413/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{hasan-etal-2021-xl,\ntitle = \"{XL}-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages\",\nauthor = \"Hasan, Tahmid  and\nBhattacharjee, Abhik  and\nIslam, Md. Saiful  and\nMubasshir, Kazi  and\nLi, Yuan-Fang  and\nKang, Yong-Bin  and\nRahman, M. Sohel  and\nShahriyar, Rifat\",\nbooktitle = \"Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021\",\nmonth = aug,\nyear = \"2021\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2021.findings-acl.413\",\npages = \"4693--4703\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTahmid Hasan\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:tahmidhasan@cse.buet.ac.bd\"\u003etahmidhasan@cse.buet.ac.bd\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://explainaboard.nlpedia.ai/leaderboard/task_xlsum/\"\u003eExplainaboard\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe leaderboard ranks models based on ROUGE scores (R1/R2/RL) of the generated summaries.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmharic\u003c/code\u003e, \u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eAzerbaijani\u003c/code\u003e, \u003ccode\u003eBengali, Bangla\u003c/code\u003e,\n              \u003ccode\u003eBurmese\u003c/code\u003e, \u003ccode\u003eChinese (family)\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n              \u003ccode\u003eGujarati\u003c/code\u003e, \u003ccode\u003eHausa\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e, \u003ccode\u003eIgbo\u003c/code\u003e, \u003ccode\u003eIndonesian\u003c/code\u003e,\n              \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eRundi\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003eKirghiz, Kyrgyz\u003c/code\u003e,\n              \u003ccode\u003eMarathi\u003c/code\u003e, \u003ccode\u003eNepali (individual language)\u003c/code\u003e, \u003ccode\u003eOromo\u003c/code\u003e,\n              \u003ccode\u003ePushto, Pashto\u003c/code\u003e, \u003ccode\u003ePersian\u003c/code\u003e, \u003ccode\u003eGhanaian Pidgin English\u003c/code\u003e,\n              \u003ccode\u003ePortuguese\u003c/code\u003e, \u003ccode\u003ePanjabi, Punjabi\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e,\n              \u003ccode\u003eScottish Gaelic, Gaelic\u003c/code\u003e, \u003ccode\u003eSerbian\u003c/code\u003e, \u003ccode\u003eRomano-Serbian\u003c/code\u003e,\n              \u003ccode\u003eSinhala, Sinhalese\u003c/code\u003e, \u003ccode\u003eSomali\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e,\n              \u003ccode\u003eSwahili (individual language), Kiswahili\u003c/code\u003e, \u003ccode\u003eTamil\u003c/code\u003e, \u003ccode\u003eTelugu\u003c/code\u003e,\n              \u003ccode\u003eThai\u003c/code\u003e, \u003ccode\u003eTigrinya\u003c/code\u003e, \u003ccode\u003eTurkish\u003c/code\u003e, \u003ccode\u003eUkrainian\u003c/code\u003e, \u003ccode\u003eUrdu\u003c/code\u003e,\n              \u003ccode\u003eUzbek\u003c/code\u003e, \u003ccode\u003eVietnamese\u003c/code\u003e, \u003ccode\u003eWelsh\u003c/code\u003e, \u003ccode\u003eYoruba\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-sa-4.0: Creative Commons Attribution Non Commercial Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbstractive summarization has centered around the English language, as most large abstractive\n              summarization datasets are available in English only. Though there have been some recent efforts for\n              curating multilingual abstractive summarization datasets, they are limited in terms of the number of\n              languages covered, the number of training samples, or both. To this end, \u003cstrong\u003eXL-Sum\u003c/strong\u003e presents\n              a large-scale abstractive summarization dataset of 1.35 million news articles from 45 languages crawled\n              from the British Broadcasting Corporation website. It is intended to be used for both multilingual and\n              per-language summarization tasks.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarize news-like text in one of 45 languages.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBangladesh University of Engineering and Technology\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTahmid Hasan (Bangladesh University of Engineering and Technology), Abhik Bhattacharjee (Bangladesh\n              University of Engineering and Technology)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: A string representing the article ID.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eurl\u003c/code\u003e: A string representing the article URL.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etitle\u003c/code\u003e: A string containing the article title.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary\u003c/code\u003e: A string containing the article summary.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etext\u003c/code\u003e : A string containing the article text.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"gem_id\": \"GEM-xlsum_english-train-1589\",\n\"url\": \"[BBC news](https://www.bbc.com/news)/technology-17657859\",\n\"title\": \"Yahoo files e-book advert system patent applications\",\n\"summary\": \"Yahoo has signalled it is investigating e-book adverts as a way to stimulate its earnings.\",\n\"text\": \"Yahoo's patents suggest users could weigh the type of ads against the sizes of discount before purchase. It says in two US patent applications that ads for digital book readers have been \\\"less than optimal\\\" to date. The filings suggest that users could be offered titles at a variety of prices depending on the ads' prominence They add that the products shown could be determined by the type of book being read, or even the contents of a specific chapter, phrase or word. The paperwork was published by the US Patent and Trademark Office late last week and relates to work carried out at the firm's headquarters in Sunnyvale, California. \\\"Greater levels of advertising, which may be more valuable to an advertiser and potentially more distracting to an e-book reader, may warrant higher discounts,\\\" it states. Free books It suggests users could be offered ads as hyperlinks based within the book's text, in-laid text or even \\\"dynamic content\\\" such as video. Another idea suggests boxes at the bottom of a page could trail later chapters or quotes saying \\\"brought to you by Company A\\\". It adds that the more willing the customer is to see the ads, the greater the potential discount. \\\"Higher frequencies... may even be great enough to allow the e-book to be obtained for free,\\\" it states. The authors write that the type of ad could influence the value of the discount, with \\\"lower class advertising... such as teeth whitener advertisements\\\" offering a cheaper price than \\\"high\\\" or \\\"middle class\\\" adverts, for things like pizza. The inventors also suggest that ads could be linked to the mood or emotional state the reader is in as a they progress through a title. For example, they say if characters fall in love or show affection during a chapter, then ads for flowers or entertainment could be triggered. The patents also suggest this could applied to children's books - giving the Tom Hanks animated film Polar Express as an example. It says a scene showing a waiter giving the protagonists hot drinks \\\"may be an excellent opportunity to show an advertisement for hot cocoa, or a branded chocolate bar\\\". Another example states: \\\"If the setting includes young characters, a Coke advertisement could be provided, inviting the reader to enjoy a glass of Coke with his book, and providing a graphic of a cool glass.\\\" It adds that such targeting could be further enhanced by taking account of previous titles the owner has bought. 'Advertising-free zone' At present, several Amazon and Kobo e-book readers offer full-screen adverts when the device is switched off and show smaller ads on their menu screens, but the main text of the titles remains free of marketing. Yahoo does not currently provide ads to these devices, and a move into the area could boost its shrinking revenues. However, Philip Jones, deputy editor of the Bookseller magazine, said that the internet firm might struggle to get some of its ideas adopted. \\\"This has been mooted before and was fairly well decried,\\\" he said. \\\"Perhaps in a limited context it could work if the merchandise was strongly related to the title and was kept away from the text. \\\"But readers - particularly parents - like the fact that reading is an advertising-free zone. Authors would also want something to say about ads interrupting their narrative flow.\\\"\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits in the dataset are specified by the language names, which are as follows:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eamharic\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003earabic\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eazerbaijani\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ebengali\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eburmese\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003echinese_simplified\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003echinese_traditional\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eenglish\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003efrench\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egujarati\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehausa\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehindi\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eigbo\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eindonesian\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ejapanese\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ekirundi\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ekorean\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ekyrgyz\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003emarathi\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003enepali\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eoromo\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epashto\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epersian\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epidgin\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eportuguese\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epunjabi\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003erussian\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003escottish_gaelic\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eserbian_cyrillic\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eserbian_latin\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esinhala\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esomali\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003espanish\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eswahili\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etamil\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etelugu\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ethai\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etigrinya\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eturkish\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eukrainian\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eurdu\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003euzbek\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evietnamese\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ewelsh\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eyoruba\u003c/code\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe used a 80%-10%-10% split for all languages with a few exceptions. \u003ccode\u003eEnglish\u003c/code\u003e was split\n              93%-3.5%-3.5% for the evaluation set size to resemble that of \u003ccode\u003eCNN/DM\u003c/code\u003e and \u003ccode\u003eXSum\u003c/code\u003e;\n              \u003ccode\u003eScottish Gaelic\u003c/code\u003e, \u003ccode\u003eKyrgyz\u003c/code\u003e and \u003ccode\u003eSinhala\u003c/code\u003e had relatively fewer samples,\n              their evaluation sets were increased to 500 samples for more reliable evaluation. Same articles were used\n              for evaluation in the two variants of Chinese and Serbian to prevent data leakage in multilingual\n              training. Individual dataset download links with train-dev-test example counts are given below:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eLanguage\u003c/th\u003e\n                    \u003cth\u003eISO 639-1 Code\u003c/th\u003e\n                    \u003cth\u003eBBC subdomain(s)\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                    \u003cth\u003eTotal\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAmharic\u003c/td\u003e\n                    \u003ctd\u003eam\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/amharic\"\u003eBBC amharic\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5761\u003c/td\u003e\n                    \u003ctd\u003e719\u003c/td\u003e\n                    \u003ctd\u003e719\u003c/td\u003e\n                    \u003ctd\u003e7199\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eArabic\u003c/td\u003e\n                    \u003ctd\u003ear\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/arabic\"\u003eBBC arabic\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e37519\u003c/td\u003e\n                    \u003ctd\u003e4689\u003c/td\u003e\n                    \u003ctd\u003e4689\u003c/td\u003e\n                    \u003ctd\u003e46897\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAzerbaijani\u003c/td\u003e\n                    \u003ctd\u003eaz\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/azeri\"\u003eBBC azeri\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6478\u003c/td\u003e\n                    \u003ctd\u003e809\u003c/td\u003e\n                    \u003ctd\u003e809\u003c/td\u003e\n                    \u003ctd\u003e8096\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eBengali\u003c/td\u003e\n                    \u003ctd\u003ebn\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/bengali\"\u003eBBC bengali\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e8102\u003c/td\u003e\n                    \u003ctd\u003e1012\u003c/td\u003e\n                    \u003ctd\u003e1012\u003c/td\u003e\n                    \u003ctd\u003e10126\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eBurmese\u003c/td\u003e\n                    \u003ctd\u003emy\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/burmese\"\u003eBBC burmese\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e4569\u003c/td\u003e\n                    \u003ctd\u003e570\u003c/td\u003e\n                    \u003ctd\u003e570\u003c/td\u003e\n                    \u003ctd\u003e5709\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eChinese (Simplified)\u003c/td\u003e\n                    \u003ctd\u003ezh-CN\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/ukchina\"\u003eBBC ukchina\u003c/a\u003e/simp, \u003ca\n                        href=\"https://www.bbc.com/zhongwen\"\u003eBBC zhongwen\u003c/a\u003e/simp\u003c/td\u003e\n                    \u003ctd\u003e37362\u003c/td\u003e\n                    \u003ctd\u003e4670\u003c/td\u003e\n                    \u003ctd\u003e4670\u003c/td\u003e\n                    \u003ctd\u003e46702\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eChinese (Traditional)\u003c/td\u003e\n                    \u003ctd\u003ezh-TW\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/ukchina\"\u003eBBC ukchina\u003c/a\u003e/trad, \u003ca\n                        href=\"https://www.bbc.com/zhongwen\"\u003eBBC zhongwen\u003c/a\u003e/trad\u003c/td\u003e\n                    \u003ctd\u003e37373\u003c/td\u003e\n                    \u003ctd\u003e4670\u003c/td\u003e\n                    \u003ctd\u003e4670\u003c/td\u003e\n                    \u003ctd\u003e46713\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eEnglish\u003c/td\u003e\n                    \u003ctd\u003een\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/english\"\u003eBBC english\u003c/a\u003e, \u003ca href=\"https://www.bbc.com/sinhala\"\u003eBBC\n                        sinhala\u003c/a\u003e \u003ccode\u003e*\u003c/code\u003e\u003c/td\u003e\n                    \u003ctd\u003e306522\u003c/td\u003e\n                    \u003ctd\u003e11535\u003c/td\u003e\n                    \u003ctd\u003e11535\u003c/td\u003e\n                    \u003ctd\u003e329592\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eFrench\u003c/td\u003e\n                    \u003ctd\u003efr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/afrique\"\u003eBBC afrique\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e8697\u003c/td\u003e\n                    \u003ctd\u003e1086\u003c/td\u003e\n                    \u003ctd\u003e1086\u003c/td\u003e\n                    \u003ctd\u003e10869\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eGujarati\u003c/td\u003e\n                    \u003ctd\u003egu\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/gujarati\"\u003eBBC gujarati\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e9119\u003c/td\u003e\n                    \u003ctd\u003e1139\u003c/td\u003e\n                    \u003ctd\u003e1139\u003c/td\u003e\n                    \u003ctd\u003e11397\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eHausa\u003c/td\u003e\n                    \u003ctd\u003eha\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/hausa\"\u003eBBC hausa\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6418\u003c/td\u003e\n                    \u003ctd\u003e802\u003c/td\u003e\n                    \u003ctd\u003e802\u003c/td\u003e\n                    \u003ctd\u003e8022\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eHindi\u003c/td\u003e\n                    \u003ctd\u003ehi\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/hindi\"\u003eBBC hindi\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e70778\u003c/td\u003e\n                    \u003ctd\u003e8847\u003c/td\u003e\n                    \u003ctd\u003e8847\u003c/td\u003e\n                    \u003ctd\u003e88472\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eIgbo\u003c/td\u003e\n                    \u003ctd\u003eig\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/igbo\"\u003eBBC igbo\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e4183\u003c/td\u003e\n                    \u003ctd\u003e522\u003c/td\u003e\n                    \u003ctd\u003e522\u003c/td\u003e\n                    \u003ctd\u003e5227\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eIndonesian\u003c/td\u003e\n                    \u003ctd\u003eid\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/indonesia\"\u003eBBC indonesia\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e38242\u003c/td\u003e\n                    \u003ctd\u003e4780\u003c/td\u003e\n                    \u003ctd\u003e4780\u003c/td\u003e\n                    \u003ctd\u003e47802\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eJapanese\u003c/td\u003e\n                    \u003ctd\u003eja\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/japanese\"\u003eBBC japanese\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e7113\u003c/td\u003e\n                    \u003ctd\u003e889\u003c/td\u003e\n                    \u003ctd\u003e889\u003c/td\u003e\n                    \u003ctd\u003e8891\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eKirundi\u003c/td\u003e\n                    \u003ctd\u003ern\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/gahuza\"\u003eBBC gahuza\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5746\u003c/td\u003e\n                    \u003ctd\u003e718\u003c/td\u003e\n                    \u003ctd\u003e718\u003c/td\u003e\n                    \u003ctd\u003e7182\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eKorean\u003c/td\u003e\n                    \u003ctd\u003eko\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/korean\"\u003eBBC korean\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e4407\u003c/td\u003e\n                    \u003ctd\u003e550\u003c/td\u003e\n                    \u003ctd\u003e550\u003c/td\u003e\n                    \u003ctd\u003e5507\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eKyrgyz\u003c/td\u003e\n                    \u003ctd\u003eky\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/kyrgyz\"\u003eBBC kyrgyz\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e2266\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e3266\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eMarathi\u003c/td\u003e\n                    \u003ctd\u003emr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/marathi\"\u003eBBC marathi\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e10903\u003c/td\u003e\n                    \u003ctd\u003e1362\u003c/td\u003e\n                    \u003ctd\u003e1362\u003c/td\u003e\n                    \u003ctd\u003e13627\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eNepali\u003c/td\u003e\n                    \u003ctd\u003enp\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/nepali\"\u003eBBC nepali\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5808\u003c/td\u003e\n                    \u003ctd\u003e725\u003c/td\u003e\n                    \u003ctd\u003e725\u003c/td\u003e\n                    \u003ctd\u003e7258\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eOromo\u003c/td\u003e\n                    \u003ctd\u003eom\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/afaanoromoo\"\u003eBBC afaanoromoo\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6063\u003c/td\u003e\n                    \u003ctd\u003e757\u003c/td\u003e\n                    \u003ctd\u003e757\u003c/td\u003e\n                    \u003ctd\u003e7577\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePashto\u003c/td\u003e\n                    \u003ctd\u003eps\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/pashto\"\u003eBBC pashto\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e14353\u003c/td\u003e\n                    \u003ctd\u003e1794\u003c/td\u003e\n                    \u003ctd\u003e1794\u003c/td\u003e\n                    \u003ctd\u003e17941\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePersian\u003c/td\u003e\n                    \u003ctd\u003efa\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/persian\"\u003eBBC persian\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e47251\u003c/td\u003e\n                    \u003ctd\u003e5906\u003c/td\u003e\n                    \u003ctd\u003e5906\u003c/td\u003e\n                    \u003ctd\u003e59063\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePidgin\u003ccode\u003e**\u003c/code\u003e\u003c/td\u003e\n                    \u003ctd\u003epcm\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/pidgin\"\u003eBBC pidgin\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e9208\u003c/td\u003e\n                    \u003ctd\u003e1151\u003c/td\u003e\n                    \u003ctd\u003e1151\u003c/td\u003e\n                    \u003ctd\u003e11510\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePortuguese\u003c/td\u003e\n                    \u003ctd\u003ept\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/portuguese\"\u003eBBC portuguese\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e57402\u003c/td\u003e\n                    \u003ctd\u003e7175\u003c/td\u003e\n                    \u003ctd\u003e7175\u003c/td\u003e\n                    \u003ctd\u003e71752\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePunjabi\u003c/td\u003e\n                    \u003ctd\u003epa\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/punjabi\"\u003eBBC punjabi\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e8215\u003c/td\u003e\n                    \u003ctd\u003e1026\u003c/td\u003e\n                    \u003ctd\u003e1026\u003c/td\u003e\n                    \u003ctd\u003e10267\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eRussian\u003c/td\u003e\n                    \u003ctd\u003eru\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/russian\"\u003eBBC russian\u003c/a\u003e, \u003ca\n                        href=\"https://www.bbc.com/ukrainian\"\u003eBBC ukrainian\u003c/a\u003e \u003ccode\u003e*\u003c/code\u003e\u003c/td\u003e\n                    \u003ctd\u003e62243\u003c/td\u003e\n                    \u003ctd\u003e7780\u003c/td\u003e\n                    \u003ctd\u003e7780\u003c/td\u003e\n                    \u003ctd\u003e77803\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eScottish Gaelic\u003c/td\u003e\n                    \u003ctd\u003egd\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/naidheachdan\"\u003eBBC naidheachdan\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e1313\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e2313\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSerbian (Cyrillic)\u003c/td\u003e\n                    \u003ctd\u003esr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/serbian\"\u003eBBC serbian\u003c/a\u003e/cyr\u003c/td\u003e\n                    \u003ctd\u003e7275\u003c/td\u003e\n                    \u003ctd\u003e909\u003c/td\u003e\n                    \u003ctd\u003e909\u003c/td\u003e\n                    \u003ctd\u003e9093\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSerbian (Latin)\u003c/td\u003e\n                    \u003ctd\u003esr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/serbian\"\u003eBBC serbian\u003c/a\u003e/lat\u003c/td\u003e\n                    \u003ctd\u003e7276\u003c/td\u003e\n                    \u003ctd\u003e909\u003c/td\u003e\n                    \u003ctd\u003e909\u003c/td\u003e\n                    \u003ctd\u003e9094\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSinhala\u003c/td\u003e\n                    \u003ctd\u003esi\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/sinhala\"\u003eBBC sinhala\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e3249\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e4249\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSomali\u003c/td\u003e\n                    \u003ctd\u003eso\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/somali\"\u003eBBC somali\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5962\u003c/td\u003e\n                    \u003ctd\u003e745\u003c/td\u003e\n                    \u003ctd\u003e745\u003c/td\u003e\n                    \u003ctd\u003e7452\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSpanish\u003c/td\u003e\n                    \u003ctd\u003ees\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/mundo\"\u003eBBC mundo\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e38110\u003c/td\u003e\n                    \u003ctd\u003e4763\u003c/td\u003e\n                    \u003ctd\u003e4763\u003c/td\u003e\n                    \u003ctd\u003e47636\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSwahili\u003c/td\u003e\n                    \u003ctd\u003esw\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/swahili\"\u003eBBC swahili\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e7898\u003c/td\u003e\n                    \u003ctd\u003e987\u003c/td\u003e\n                    \u003ctd\u003e987\u003c/td\u003e\n                    \u003ctd\u003e9872\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTamil\u003c/td\u003e\n                    \u003ctd\u003eta\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/tamil\"\u003eBBC tamil\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e16222\u003c/td\u003e\n                    \u003ctd\u003e2027\u003c/td\u003e\n                    \u003ctd\u003e2027\u003c/td\u003e\n                    \u003ctd\u003e20276\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTelugu\u003c/td\u003e\n                    \u003ctd\u003ete\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/telugu\"\u003eBBC telugu\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e10421\u003c/td\u003e\n                    \u003ctd\u003e1302\u003c/td\u003e\n                    \u003ctd\u003e1302\u003c/td\u003e\n                    \u003ctd\u003e13025\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eThai\u003c/td\u003e\n                    \u003ctd\u003eth\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/thai\"\u003eBBC thai\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6616\u003c/td\u003e\n                    \u003ctd\u003e826\u003c/td\u003e\n                    \u003ctd\u003e826\u003c/td\u003e\n                    \u003ctd\u003e8268\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTigrinya\u003c/td\u003e\n                    \u003ctd\u003eti\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/tigrinya\"\u003eBBC tigrinya\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5451\u003c/td\u003e\n                    \u003ctd\u003e681\u003c/td\u003e\n                    \u003ctd\u003e681\u003c/td\u003e\n                    \u003ctd\u003e6813\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTurkish\u003c/td\u003e\n                    \u003ctd\u003etr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/turkce\"\u003eBBC turkce\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e27176\u003c/td\u003e\n                    \u003ctd\u003e3397\u003c/td\u003e\n                    \u003ctd\u003e3397\u003c/td\u003e\n                    \u003ctd\u003e33970\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUkrainian\u003c/td\u003e\n                    \u003ctd\u003euk\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/ukrainian\"\u003eBBC ukrainian\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e43201\u003c/td\u003e\n                    \u003ctd\u003e5399\u003c/td\u003e\n                    \u003ctd\u003e5399\u003c/td\u003e\n                    \u003ctd\u003e53999\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUrdu\u003c/td\u003e\n                    \u003ctd\u003eur\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/urdu\"\u003eBBC urdu\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e67665\u003c/td\u003e\n                    \u003ctd\u003e8458\u003c/td\u003e\n                    \u003ctd\u003e8458\u003c/td\u003e\n                    \u003ctd\u003e84581\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUzbek\u003c/td\u003e\n                    \u003ctd\u003euz\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/uzbek\"\u003eBBC uzbek\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e4728\u003c/td\u003e\n                    \u003ctd\u003e590\u003c/td\u003e\n                    \u003ctd\u003e590\u003c/td\u003e\n                    \u003ctd\u003e5908\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eVietnamese\u003c/td\u003e\n                    \u003ctd\u003evi\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/vietnamese\"\u003eBBC vietnamese\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e32111\u003c/td\u003e\n                    \u003ctd\u003e4013\u003c/td\u003e\n                    \u003ctd\u003e4013\u003c/td\u003e\n                    \u003ctd\u003e40137\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eWelsh\u003c/td\u003e\n                    \u003ctd\u003ecy\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/cymrufyw\"\u003eBBC cymrufyw\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e9732\u003c/td\u003e\n                    \u003ctd\u003e1216\u003c/td\u003e\n                    \u003ctd\u003e1216\u003c/td\u003e\n                    \u003ctd\u003e12164\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eYoruba\u003c/td\u003e\n                    \u003ctd\u003eyo\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/yoruba\"\u003eBBC yoruba\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6350\u003c/td\u003e\n                    \u003ctd\u003e793\u003c/td\u003e\n                    \u003ctd\u003e793\u003c/td\u003e\n                    \u003ctd\u003e7936\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003e\u003ccode\u003e*\u003c/code\u003e A lot of articles in BBC Sinhala and BBC Ukrainian were written in English and Russian\n              respectively. They were identified using \u003ca href=\"https://arxiv.org/abs/1607.01759\"\u003eFasttext\u003c/a\u003e and moved\n              accordingly.\n              \u003ccode\u003e**\u003c/code\u003e West African Pidgin English\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTraditional abstractive text summarization has been centered around English and other high-resource\n              languages. \u003cstrong\u003eXL-Sum\u003c/strong\u003e provides a large collection of high-quality article-summary pairs for\n              45 languages where the languages range from high-resource to extremely low-resource. This enables the\n              research community to explore the summarization capabilities of different models for multiple languages\n              and languages in isolation. We believe the addition of \u003cstrong\u003eXL-Sum\u003c/strong\u003e to GEM makes the domain of\n              abstractive text summarization more diversified and inclusive to the research community. We hope our\n              efforts in this work will encourage the community to push the boundaries of abstractive text summarization\n              beyond the English language, especially for low and mid-resource languages, bringing technological\n              advances to communities of these languages that have been traditionally under-served.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe summaries are highly concise and abstractive.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eConciseness, abstractiveness, and overall summarization capability.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eConciseness, abstractiveness, and overall summarization capability.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE is the de facto evaluation metric used for text summarization. However, it was designed\n              specifically for evaluating English texts. Due to the nature of the metric, scores are heavily dependent\n              on text tokenization / stemming / unnecessary character removal, etc. Some modifications to the original\n              ROUGE evaluation were done such as punctuation only removal, language specific tokenization/stemming to\n              enable reliable comparison of source and target summaries across different scripts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eState-of-the-art text summarization models are heavily data-driven, i.e., a large number of\n              article-summary pairs are required to train them effectively. As a result, abstractive summarization has\n              centered around the English language, as most large abstractive summarization datasets are available in\n              English only. Though there have been some recent efforts for curating multilingual abstractive\n              summarization datasets, they are limited in terms of the number of languages covered, the number of\n              training samples, or both. To this end, we curate \u003cstrong\u003eXL-Sum\u003c/strong\u003e, a large-scale abstractive\n              summarization dataset of 1.35 million news articles from 45 languages crawled from the British\n              Broadcasting Corporation website.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIntroduce new languages in the english-centric domain of abstractive text summarization and enable both\n              multilingual and per-language summarization.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBritish Broadcasting Corporation (BBC) news websites.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language content was written by professional news editors hired by BBC.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNews\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe used 'NFKC' normalization on all text instances.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe designed a crawler to recursively crawl pages starting from the homepage by visiting different article\n              links present in each page visited. We were able to take advantage of the fact that all BBC sites have\n              somewhat similar structures, and were able to scrape articles from all sites. We discarded pages with no\n              textual contents (mostly pages consisting of multimedia contents) before further processing. We designed a\n              number of heuristics to make the extraction effective by carefully examining the HTML structures of the\n              crawled pages:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eThe desired summary must be present within the beginning two paragraphs of an article.\u003c/li\u003e\n              \u003cli\u003eThe summary paragraph must have some portion of texts in bold format.\u003c/li\u003e\n              \u003cli\u003eThe summary paragraph may contain some hyperlinks that may not be bold. The proportion of bold texts\n                and hyperlinked texts to the total length of the paragraph in consideration must be at least 95%.\u003c/li\u003e\n              \u003cli\u003eAll texts except the summary and the headline must be included in the input text (including image\n                captions).\u003c/li\u003e\n              \u003cli\u003eThe input text must be at least twice as large as the summary.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBBC's policy specifies that the text content within its websites can be used for non-commercial research\n              only.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003elikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset introduces summarization corpus for many languages where there weren't any datasets like\n              this curated before.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e, \u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e, \u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHuman evaluation showed most languages had a high percentage of good summaries in the upper nineties,\n              almost none of the summaries contained any conflicting information, while about one-third on average had\n              information that was not directly inferrable from the source article. Since generally multiple articles\n              are written regarding an important event, there could be an overlap between the training and evaluation\n              data in terms on content.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is limited to news domain only. Hence it wouldn't be advisable to use a model trained on this\n              dataset for summarizing texts from a different domain i.e. literature, scientific text etc. Another\n              pitfall could be hallucinations in the model generated summary.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE evaluates the quality of the summary as a whole by considering up to 4-gram overlaps. Therefore, in\n              an article about India if the word \"India\" in the generated summary gets replaced by \"Pakistan\" due to\n              model hallucination, the overall score wouldn't be reduced significantly, but the entire meaning could get\n              changed.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"xlsum","type":"Summarization","languages":"Amharic, Arabic, Azerbaijani, Bengali, Bangla, Burmese, Chinese (family), English, French, Gujarati, Hausa, Hindi, Igbo, Indonesian, Japanese, Rundi, Korean, Kirghiz, Kyrgyz, Marathi, Nepali (individual language), Oromo, Pushto, Pashto, Persian, Ghanaian Pidgin English, Portuguese, Panjabi, Punjabi, Russian, Scottish Gaelic, Gaelic, Serbian, Romano-Serbian, Sinhala, Sinhalese, Somali, Spanish, Castilian, Swahili (individual language), Kiswahili, Tamil, Telugu, Thai, Tigrinya, Turkish, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yoruba","summary":"XLSum is a highly multilingual summarization dataset supporting 44 language. The data stems from BBC news articles."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"xlsum"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"xlsum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003exlsum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eXLSum is a highly multilingual summarization dataset supporting 44 language. The data stems from BBC news\n          articles.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/xlsum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/xlsum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/csebuetnlp/xl-sum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.413/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eTahmid Hasan\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eAmharic\u003c/code\u003e, \u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eAzerbaijani\u003c/code\u003e, \u003ccode\u003eBengali, Bangla\u003c/code\u003e,\n          \u003ccode\u003eBurmese\u003c/code\u003e, \u003ccode\u003eChinese (family)\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n          \u003ccode\u003eGujarati\u003c/code\u003e, \u003ccode\u003eHausa\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e, \u003ccode\u003eIgbo\u003c/code\u003e, \u003ccode\u003eIndonesian\u003c/code\u003e,\n          \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eRundi\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003eKirghiz, Kyrgyz\u003c/code\u003e,\n          \u003ccode\u003eMarathi\u003c/code\u003e, \u003ccode\u003eNepali (individual language)\u003c/code\u003e, \u003ccode\u003eOromo\u003c/code\u003e,\n          \u003ccode\u003ePushto, Pashto\u003c/code\u003e, \u003ccode\u003ePersian\u003c/code\u003e, \u003ccode\u003eGhanaian Pidgin English\u003c/code\u003e,\n          \u003ccode\u003ePortuguese\u003c/code\u003e, \u003ccode\u003ePanjabi, Punjabi\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e,\n          \u003ccode\u003eScottish Gaelic, Gaelic\u003c/code\u003e, \u003ccode\u003eSerbian\u003c/code\u003e, \u003ccode\u003eRomano-Serbian\u003c/code\u003e,\n          \u003ccode\u003eSinhala, Sinhalese\u003c/code\u003e, \u003ccode\u003eSomali\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e,\n          \u003ccode\u003eSwahili (individual language), Kiswahili\u003c/code\u003e, \u003ccode\u003eTamil\u003c/code\u003e, \u003ccode\u003eTelugu\u003c/code\u003e,\n          \u003ccode\u003eThai\u003c/code\u003e, \u003ccode\u003eTigrinya\u003c/code\u003e, \u003ccode\u003eTurkish\u003c/code\u003e, \u003ccode\u003eUkrainian\u003c/code\u003e, \u003ccode\u003eUrdu\u003c/code\u003e,\n          \u003ccode\u003eUzbek\u003c/code\u003e, \u003ccode\u003eVietnamese\u003c/code\u003e, \u003ccode\u003eWelsh\u003c/code\u003e, \u003ccode\u003eYoruba\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-nc-sa-4.0: Creative Commons Attribution Non Commercial Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eSummarize news-like text in one of 45 languages.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003elikely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/csebuetnlp/xl-sum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://huggingface.co/datasets/csebuetnlp/xlsum/tree/main/data\"\u003eHuggingface\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.413/\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@inproceedings{hasan-etal-2021-xl,\ntitle = \"{XL}-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages\",\nauthor = \"Hasan, Tahmid  and\nBhattacharjee, Abhik  and\nIslam, Md. Saiful  and\nMubasshir, Kazi  and\nLi, Yuan-Fang  and\nKang, Yong-Bin  and\nRahman, M. Sohel  and\nShahriyar, Rifat\",\nbooktitle = \"Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021\",\nmonth = aug,\nyear = \"2021\",\naddress = \"Online\",\npublisher = \"Association for Computational Linguistics\",\nurl = \"https://aclanthology.org/2021.findings-acl.413\",\npages = \"4693--4703\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTahmid Hasan\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:tahmidhasan@cse.buet.ac.bd\"\u003etahmidhasan@cse.buet.ac.bd\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Link\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a link to the leaderboard.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"http://explainaboard.nlpedia.ai/leaderboard/task_xlsum/\"\u003eExplainaboard\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLeaderboard Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBriefly describe how the leaderboard evaluates models.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe leaderboard ranks models based on ROUGE scores (R1/R2/RL) of the generated summaries.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eAmharic\u003c/code\u003e, \u003ccode\u003eArabic\u003c/code\u003e, \u003ccode\u003eAzerbaijani\u003c/code\u003e, \u003ccode\u003eBengali, Bangla\u003c/code\u003e,\n              \u003ccode\u003eBurmese\u003c/code\u003e, \u003ccode\u003eChinese (family)\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e,\n              \u003ccode\u003eGujarati\u003c/code\u003e, \u003ccode\u003eHausa\u003c/code\u003e, \u003ccode\u003eHindi\u003c/code\u003e, \u003ccode\u003eIgbo\u003c/code\u003e, \u003ccode\u003eIndonesian\u003c/code\u003e,\n              \u003ccode\u003eJapanese\u003c/code\u003e, \u003ccode\u003eRundi\u003c/code\u003e, \u003ccode\u003eKorean\u003c/code\u003e, \u003ccode\u003eKirghiz, Kyrgyz\u003c/code\u003e,\n              \u003ccode\u003eMarathi\u003c/code\u003e, \u003ccode\u003eNepali (individual language)\u003c/code\u003e, \u003ccode\u003eOromo\u003c/code\u003e,\n              \u003ccode\u003ePushto, Pashto\u003c/code\u003e, \u003ccode\u003ePersian\u003c/code\u003e, \u003ccode\u003eGhanaian Pidgin English\u003c/code\u003e,\n              \u003ccode\u003ePortuguese\u003c/code\u003e, \u003ccode\u003ePanjabi, Punjabi\u003c/code\u003e, \u003ccode\u003eRussian\u003c/code\u003e,\n              \u003ccode\u003eScottish Gaelic, Gaelic\u003c/code\u003e, \u003ccode\u003eSerbian\u003c/code\u003e, \u003ccode\u003eRomano-Serbian\u003c/code\u003e,\n              \u003ccode\u003eSinhala, Sinhalese\u003c/code\u003e, \u003ccode\u003eSomali\u003c/code\u003e, \u003ccode\u003eSpanish, Castilian\u003c/code\u003e,\n              \u003ccode\u003eSwahili (individual language), Kiswahili\u003c/code\u003e, \u003ccode\u003eTamil\u003c/code\u003e, \u003ccode\u003eTelugu\u003c/code\u003e,\n              \u003ccode\u003eThai\u003c/code\u003e, \u003ccode\u003eTigrinya\u003c/code\u003e, \u003ccode\u003eTurkish\u003c/code\u003e, \u003ccode\u003eUkrainian\u003c/code\u003e, \u003ccode\u003eUrdu\u003c/code\u003e,\n              \u003ccode\u003eUzbek\u003c/code\u003e, \u003ccode\u003eVietnamese\u003c/code\u003e, \u003ccode\u003eWelsh\u003c/code\u003e, \u003ccode\u003eYoruba\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-nc-sa-4.0: Creative Commons Attribution Non Commercial Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eAbstractive summarization has centered around the English language, as most large abstractive\n              summarization datasets are available in English only. Though there have been some recent efforts for\n              curating multilingual abstractive summarization datasets, they are limited in terms of the number of\n              languages covered, the number of training samples, or both. To this end, \u003cstrong\u003eXL-Sum\u003c/strong\u003e presents\n              a large-scale abstractive summarization dataset of 1.35 million news articles from 45 languages crawled\n              from the British Broadcasting Corporation website. It is intended to be used for both multilingual and\n              per-language summarization tasks.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarize news-like text in one of 45 languages.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBangladesh University of Engineering and Technology\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTahmid Hasan (Bangladesh University of Engineering and Technology), Abhik Bhattacharjee (Bangladesh\n              University of Engineering and Technology)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003egem_id\u003c/code\u003e: A string representing the article ID.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eurl\u003c/code\u003e: A string representing the article URL.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etitle\u003c/code\u003e: A string containing the article title.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esummary\u003c/code\u003e: A string containing the article summary.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etext\u003c/code\u003e : A string containing the article text.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n\"gem_id\": \"GEM-xlsum_english-train-1589\",\n\"url\": \"[BBC news](https://www.bbc.com/news)/technology-17657859\",\n\"title\": \"Yahoo files e-book advert system patent applications\",\n\"summary\": \"Yahoo has signalled it is investigating e-book adverts as a way to stimulate its earnings.\",\n\"text\": \"Yahoo's patents suggest users could weigh the type of ads against the sizes of discount before purchase. It says in two US patent applications that ads for digital book readers have been \\\"less than optimal\\\" to date. The filings suggest that users could be offered titles at a variety of prices depending on the ads' prominence They add that the products shown could be determined by the type of book being read, or even the contents of a specific chapter, phrase or word. The paperwork was published by the US Patent and Trademark Office late last week and relates to work carried out at the firm's headquarters in Sunnyvale, California. \\\"Greater levels of advertising, which may be more valuable to an advertiser and potentially more distracting to an e-book reader, may warrant higher discounts,\\\" it states. Free books It suggests users could be offered ads as hyperlinks based within the book's text, in-laid text or even \\\"dynamic content\\\" such as video. Another idea suggests boxes at the bottom of a page could trail later chapters or quotes saying \\\"brought to you by Company A\\\". It adds that the more willing the customer is to see the ads, the greater the potential discount. \\\"Higher frequencies... may even be great enough to allow the e-book to be obtained for free,\\\" it states. The authors write that the type of ad could influence the value of the discount, with \\\"lower class advertising... such as teeth whitener advertisements\\\" offering a cheaper price than \\\"high\\\" or \\\"middle class\\\" adverts, for things like pizza. The inventors also suggest that ads could be linked to the mood or emotional state the reader is in as a they progress through a title. For example, they say if characters fall in love or show affection during a chapter, then ads for flowers or entertainment could be triggered. The patents also suggest this could applied to children's books - giving the Tom Hanks animated film Polar Express as an example. It says a scene showing a waiter giving the protagonists hot drinks \\\"may be an excellent opportunity to show an advertisement for hot cocoa, or a branded chocolate bar\\\". Another example states: \\\"If the setting includes young characters, a Coke advertisement could be provided, inviting the reader to enjoy a glass of Coke with his book, and providing a graphic of a cool glass.\\\" It adds that such targeting could be further enhanced by taking account of previous titles the owner has bought. 'Advertising-free zone' At present, several Amazon and Kobo e-book readers offer full-screen adverts when the device is switched off and show smaller ads on their menu screens, but the main text of the titles remains free of marketing. Yahoo does not currently provide ads to these devices, and a move into the area could boost its shrinking revenues. However, Philip Jones, deputy editor of the Bookseller magazine, said that the internet firm might struggle to get some of its ideas adopted. \\\"This has been mooted before and was fairly well decried,\\\" he said. \\\"Perhaps in a limited context it could work if the merchandise was strongly related to the title and was kept away from the text. \\\"But readers - particularly parents - like the fact that reading is an advertising-free zone. Authors would also want something to say about ads interrupting their narrative flow.\\\"\"\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe splits in the dataset are specified by the language names, which are as follows:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eamharic\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003earabic\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eazerbaijani\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ebengali\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eburmese\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003echinese_simplified\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003echinese_traditional\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eenglish\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003efrench\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003egujarati\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehausa\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ehindi\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eigbo\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eindonesian\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ejapanese\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ekirundi\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ekorean\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ekyrgyz\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003emarathi\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003enepali\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eoromo\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epashto\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epersian\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epidgin\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eportuguese\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003epunjabi\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003erussian\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003escottish_gaelic\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eserbian_cyrillic\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eserbian_latin\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esinhala\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003esomali\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003espanish\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eswahili\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etamil\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etelugu\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ethai\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003etigrinya\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eturkish\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eukrainian\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eurdu\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003euzbek\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003evietnamese\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003ewelsh\u003c/code\u003e\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eyoruba\u003c/code\u003e\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe used a 80%-10%-10% split for all languages with a few exceptions. \u003ccode\u003eEnglish\u003c/code\u003e was split\n              93%-3.5%-3.5% for the evaluation set size to resemble that of \u003ccode\u003eCNN/DM\u003c/code\u003e and \u003ccode\u003eXSum\u003c/code\u003e;\n              \u003ccode\u003eScottish Gaelic\u003c/code\u003e, \u003ccode\u003eKyrgyz\u003c/code\u003e and \u003ccode\u003eSinhala\u003c/code\u003e had relatively fewer samples,\n              their evaluation sets were increased to 500 samples for more reliable evaluation. Same articles were used\n              for evaluation in the two variants of Chinese and Serbian to prevent data leakage in multilingual\n              training. Individual dataset download links with train-dev-test example counts are given below:\u003c/p\u003e\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eLanguage\u003c/th\u003e\n                    \u003cth\u003eISO 639-1 Code\u003c/th\u003e\n                    \u003cth\u003eBBC subdomain(s)\u003c/th\u003e\n                    \u003cth\u003eTrain\u003c/th\u003e\n                    \u003cth\u003eDev\u003c/th\u003e\n                    \u003cth\u003eTest\u003c/th\u003e\n                    \u003cth\u003eTotal\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAmharic\u003c/td\u003e\n                    \u003ctd\u003eam\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/amharic\"\u003eBBC amharic\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5761\u003c/td\u003e\n                    \u003ctd\u003e719\u003c/td\u003e\n                    \u003ctd\u003e719\u003c/td\u003e\n                    \u003ctd\u003e7199\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eArabic\u003c/td\u003e\n                    \u003ctd\u003ear\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/arabic\"\u003eBBC arabic\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e37519\u003c/td\u003e\n                    \u003ctd\u003e4689\u003c/td\u003e\n                    \u003ctd\u003e4689\u003c/td\u003e\n                    \u003ctd\u003e46897\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eAzerbaijani\u003c/td\u003e\n                    \u003ctd\u003eaz\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/azeri\"\u003eBBC azeri\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6478\u003c/td\u003e\n                    \u003ctd\u003e809\u003c/td\u003e\n                    \u003ctd\u003e809\u003c/td\u003e\n                    \u003ctd\u003e8096\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eBengali\u003c/td\u003e\n                    \u003ctd\u003ebn\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/bengali\"\u003eBBC bengali\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e8102\u003c/td\u003e\n                    \u003ctd\u003e1012\u003c/td\u003e\n                    \u003ctd\u003e1012\u003c/td\u003e\n                    \u003ctd\u003e10126\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eBurmese\u003c/td\u003e\n                    \u003ctd\u003emy\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/burmese\"\u003eBBC burmese\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e4569\u003c/td\u003e\n                    \u003ctd\u003e570\u003c/td\u003e\n                    \u003ctd\u003e570\u003c/td\u003e\n                    \u003ctd\u003e5709\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eChinese (Simplified)\u003c/td\u003e\n                    \u003ctd\u003ezh-CN\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/ukchina\"\u003eBBC ukchina\u003c/a\u003e/simp, \u003ca\n                        href=\"https://www.bbc.com/zhongwen\"\u003eBBC zhongwen\u003c/a\u003e/simp\u003c/td\u003e\n                    \u003ctd\u003e37362\u003c/td\u003e\n                    \u003ctd\u003e4670\u003c/td\u003e\n                    \u003ctd\u003e4670\u003c/td\u003e\n                    \u003ctd\u003e46702\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eChinese (Traditional)\u003c/td\u003e\n                    \u003ctd\u003ezh-TW\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/ukchina\"\u003eBBC ukchina\u003c/a\u003e/trad, \u003ca\n                        href=\"https://www.bbc.com/zhongwen\"\u003eBBC zhongwen\u003c/a\u003e/trad\u003c/td\u003e\n                    \u003ctd\u003e37373\u003c/td\u003e\n                    \u003ctd\u003e4670\u003c/td\u003e\n                    \u003ctd\u003e4670\u003c/td\u003e\n                    \u003ctd\u003e46713\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eEnglish\u003c/td\u003e\n                    \u003ctd\u003een\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/english\"\u003eBBC english\u003c/a\u003e, \u003ca href=\"https://www.bbc.com/sinhala\"\u003eBBC\n                        sinhala\u003c/a\u003e \u003ccode\u003e*\u003c/code\u003e\u003c/td\u003e\n                    \u003ctd\u003e306522\u003c/td\u003e\n                    \u003ctd\u003e11535\u003c/td\u003e\n                    \u003ctd\u003e11535\u003c/td\u003e\n                    \u003ctd\u003e329592\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eFrench\u003c/td\u003e\n                    \u003ctd\u003efr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/afrique\"\u003eBBC afrique\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e8697\u003c/td\u003e\n                    \u003ctd\u003e1086\u003c/td\u003e\n                    \u003ctd\u003e1086\u003c/td\u003e\n                    \u003ctd\u003e10869\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eGujarati\u003c/td\u003e\n                    \u003ctd\u003egu\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/gujarati\"\u003eBBC gujarati\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e9119\u003c/td\u003e\n                    \u003ctd\u003e1139\u003c/td\u003e\n                    \u003ctd\u003e1139\u003c/td\u003e\n                    \u003ctd\u003e11397\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eHausa\u003c/td\u003e\n                    \u003ctd\u003eha\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/hausa\"\u003eBBC hausa\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6418\u003c/td\u003e\n                    \u003ctd\u003e802\u003c/td\u003e\n                    \u003ctd\u003e802\u003c/td\u003e\n                    \u003ctd\u003e8022\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eHindi\u003c/td\u003e\n                    \u003ctd\u003ehi\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/hindi\"\u003eBBC hindi\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e70778\u003c/td\u003e\n                    \u003ctd\u003e8847\u003c/td\u003e\n                    \u003ctd\u003e8847\u003c/td\u003e\n                    \u003ctd\u003e88472\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eIgbo\u003c/td\u003e\n                    \u003ctd\u003eig\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/igbo\"\u003eBBC igbo\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e4183\u003c/td\u003e\n                    \u003ctd\u003e522\u003c/td\u003e\n                    \u003ctd\u003e522\u003c/td\u003e\n                    \u003ctd\u003e5227\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eIndonesian\u003c/td\u003e\n                    \u003ctd\u003eid\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/indonesia\"\u003eBBC indonesia\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e38242\u003c/td\u003e\n                    \u003ctd\u003e4780\u003c/td\u003e\n                    \u003ctd\u003e4780\u003c/td\u003e\n                    \u003ctd\u003e47802\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eJapanese\u003c/td\u003e\n                    \u003ctd\u003eja\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/japanese\"\u003eBBC japanese\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e7113\u003c/td\u003e\n                    \u003ctd\u003e889\u003c/td\u003e\n                    \u003ctd\u003e889\u003c/td\u003e\n                    \u003ctd\u003e8891\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eKirundi\u003c/td\u003e\n                    \u003ctd\u003ern\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/gahuza\"\u003eBBC gahuza\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5746\u003c/td\u003e\n                    \u003ctd\u003e718\u003c/td\u003e\n                    \u003ctd\u003e718\u003c/td\u003e\n                    \u003ctd\u003e7182\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eKorean\u003c/td\u003e\n                    \u003ctd\u003eko\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/korean\"\u003eBBC korean\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e4407\u003c/td\u003e\n                    \u003ctd\u003e550\u003c/td\u003e\n                    \u003ctd\u003e550\u003c/td\u003e\n                    \u003ctd\u003e5507\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eKyrgyz\u003c/td\u003e\n                    \u003ctd\u003eky\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/kyrgyz\"\u003eBBC kyrgyz\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e2266\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e3266\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eMarathi\u003c/td\u003e\n                    \u003ctd\u003emr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/marathi\"\u003eBBC marathi\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e10903\u003c/td\u003e\n                    \u003ctd\u003e1362\u003c/td\u003e\n                    \u003ctd\u003e1362\u003c/td\u003e\n                    \u003ctd\u003e13627\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eNepali\u003c/td\u003e\n                    \u003ctd\u003enp\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/nepali\"\u003eBBC nepali\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5808\u003c/td\u003e\n                    \u003ctd\u003e725\u003c/td\u003e\n                    \u003ctd\u003e725\u003c/td\u003e\n                    \u003ctd\u003e7258\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eOromo\u003c/td\u003e\n                    \u003ctd\u003eom\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/afaanoromoo\"\u003eBBC afaanoromoo\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6063\u003c/td\u003e\n                    \u003ctd\u003e757\u003c/td\u003e\n                    \u003ctd\u003e757\u003c/td\u003e\n                    \u003ctd\u003e7577\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePashto\u003c/td\u003e\n                    \u003ctd\u003eps\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/pashto\"\u003eBBC pashto\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e14353\u003c/td\u003e\n                    \u003ctd\u003e1794\u003c/td\u003e\n                    \u003ctd\u003e1794\u003c/td\u003e\n                    \u003ctd\u003e17941\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePersian\u003c/td\u003e\n                    \u003ctd\u003efa\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/persian\"\u003eBBC persian\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e47251\u003c/td\u003e\n                    \u003ctd\u003e5906\u003c/td\u003e\n                    \u003ctd\u003e5906\u003c/td\u003e\n                    \u003ctd\u003e59063\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePidgin\u003ccode\u003e**\u003c/code\u003e\u003c/td\u003e\n                    \u003ctd\u003epcm\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/pidgin\"\u003eBBC pidgin\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e9208\u003c/td\u003e\n                    \u003ctd\u003e1151\u003c/td\u003e\n                    \u003ctd\u003e1151\u003c/td\u003e\n                    \u003ctd\u003e11510\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePortuguese\u003c/td\u003e\n                    \u003ctd\u003ept\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/portuguese\"\u003eBBC portuguese\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e57402\u003c/td\u003e\n                    \u003ctd\u003e7175\u003c/td\u003e\n                    \u003ctd\u003e7175\u003c/td\u003e\n                    \u003ctd\u003e71752\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003ePunjabi\u003c/td\u003e\n                    \u003ctd\u003epa\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/punjabi\"\u003eBBC punjabi\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e8215\u003c/td\u003e\n                    \u003ctd\u003e1026\u003c/td\u003e\n                    \u003ctd\u003e1026\u003c/td\u003e\n                    \u003ctd\u003e10267\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eRussian\u003c/td\u003e\n                    \u003ctd\u003eru\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/russian\"\u003eBBC russian\u003c/a\u003e, \u003ca\n                        href=\"https://www.bbc.com/ukrainian\"\u003eBBC ukrainian\u003c/a\u003e \u003ccode\u003e*\u003c/code\u003e\u003c/td\u003e\n                    \u003ctd\u003e62243\u003c/td\u003e\n                    \u003ctd\u003e7780\u003c/td\u003e\n                    \u003ctd\u003e7780\u003c/td\u003e\n                    \u003ctd\u003e77803\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eScottish Gaelic\u003c/td\u003e\n                    \u003ctd\u003egd\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/naidheachdan\"\u003eBBC naidheachdan\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e1313\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e2313\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSerbian (Cyrillic)\u003c/td\u003e\n                    \u003ctd\u003esr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/serbian\"\u003eBBC serbian\u003c/a\u003e/cyr\u003c/td\u003e\n                    \u003ctd\u003e7275\u003c/td\u003e\n                    \u003ctd\u003e909\u003c/td\u003e\n                    \u003ctd\u003e909\u003c/td\u003e\n                    \u003ctd\u003e9093\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSerbian (Latin)\u003c/td\u003e\n                    \u003ctd\u003esr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/serbian\"\u003eBBC serbian\u003c/a\u003e/lat\u003c/td\u003e\n                    \u003ctd\u003e7276\u003c/td\u003e\n                    \u003ctd\u003e909\u003c/td\u003e\n                    \u003ctd\u003e909\u003c/td\u003e\n                    \u003ctd\u003e9094\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSinhala\u003c/td\u003e\n                    \u003ctd\u003esi\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/sinhala\"\u003eBBC sinhala\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e3249\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e500\u003c/td\u003e\n                    \u003ctd\u003e4249\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSomali\u003c/td\u003e\n                    \u003ctd\u003eso\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/somali\"\u003eBBC somali\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5962\u003c/td\u003e\n                    \u003ctd\u003e745\u003c/td\u003e\n                    \u003ctd\u003e745\u003c/td\u003e\n                    \u003ctd\u003e7452\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSpanish\u003c/td\u003e\n                    \u003ctd\u003ees\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/mundo\"\u003eBBC mundo\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e38110\u003c/td\u003e\n                    \u003ctd\u003e4763\u003c/td\u003e\n                    \u003ctd\u003e4763\u003c/td\u003e\n                    \u003ctd\u003e47636\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSwahili\u003c/td\u003e\n                    \u003ctd\u003esw\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/swahili\"\u003eBBC swahili\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e7898\u003c/td\u003e\n                    \u003ctd\u003e987\u003c/td\u003e\n                    \u003ctd\u003e987\u003c/td\u003e\n                    \u003ctd\u003e9872\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTamil\u003c/td\u003e\n                    \u003ctd\u003eta\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/tamil\"\u003eBBC tamil\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e16222\u003c/td\u003e\n                    \u003ctd\u003e2027\u003c/td\u003e\n                    \u003ctd\u003e2027\u003c/td\u003e\n                    \u003ctd\u003e20276\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTelugu\u003c/td\u003e\n                    \u003ctd\u003ete\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/telugu\"\u003eBBC telugu\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e10421\u003c/td\u003e\n                    \u003ctd\u003e1302\u003c/td\u003e\n                    \u003ctd\u003e1302\u003c/td\u003e\n                    \u003ctd\u003e13025\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eThai\u003c/td\u003e\n                    \u003ctd\u003eth\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/thai\"\u003eBBC thai\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6616\u003c/td\u003e\n                    \u003ctd\u003e826\u003c/td\u003e\n                    \u003ctd\u003e826\u003c/td\u003e\n                    \u003ctd\u003e8268\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTigrinya\u003c/td\u003e\n                    \u003ctd\u003eti\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/tigrinya\"\u003eBBC tigrinya\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e5451\u003c/td\u003e\n                    \u003ctd\u003e681\u003c/td\u003e\n                    \u003ctd\u003e681\u003c/td\u003e\n                    \u003ctd\u003e6813\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTurkish\u003c/td\u003e\n                    \u003ctd\u003etr\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/turkce\"\u003eBBC turkce\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e27176\u003c/td\u003e\n                    \u003ctd\u003e3397\u003c/td\u003e\n                    \u003ctd\u003e3397\u003c/td\u003e\n                    \u003ctd\u003e33970\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUkrainian\u003c/td\u003e\n                    \u003ctd\u003euk\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/ukrainian\"\u003eBBC ukrainian\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e43201\u003c/td\u003e\n                    \u003ctd\u003e5399\u003c/td\u003e\n                    \u003ctd\u003e5399\u003c/td\u003e\n                    \u003ctd\u003e53999\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUrdu\u003c/td\u003e\n                    \u003ctd\u003eur\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/urdu\"\u003eBBC urdu\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e67665\u003c/td\u003e\n                    \u003ctd\u003e8458\u003c/td\u003e\n                    \u003ctd\u003e8458\u003c/td\u003e\n                    \u003ctd\u003e84581\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eUzbek\u003c/td\u003e\n                    \u003ctd\u003euz\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/uzbek\"\u003eBBC uzbek\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e4728\u003c/td\u003e\n                    \u003ctd\u003e590\u003c/td\u003e\n                    \u003ctd\u003e590\u003c/td\u003e\n                    \u003ctd\u003e5908\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eVietnamese\u003c/td\u003e\n                    \u003ctd\u003evi\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/vietnamese\"\u003eBBC vietnamese\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e32111\u003c/td\u003e\n                    \u003ctd\u003e4013\u003c/td\u003e\n                    \u003ctd\u003e4013\u003c/td\u003e\n                    \u003ctd\u003e40137\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eWelsh\u003c/td\u003e\n                    \u003ctd\u003ecy\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/cymrufyw\"\u003eBBC cymrufyw\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e9732\u003c/td\u003e\n                    \u003ctd\u003e1216\u003c/td\u003e\n                    \u003ctd\u003e1216\u003c/td\u003e\n                    \u003ctd\u003e12164\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eYoruba\u003c/td\u003e\n                    \u003ctd\u003eyo\u003c/td\u003e\n                    \u003ctd\u003e\u003ca href=\"https://www.bbc.com/yoruba\"\u003eBBC yoruba\u003c/a\u003e\u003c/td\u003e\n                    \u003ctd\u003e6350\u003c/td\u003e\n                    \u003ctd\u003e793\u003c/td\u003e\n                    \u003ctd\u003e793\u003c/td\u003e\n                    \u003ctd\u003e7936\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n            \u003cp\u003e\u003ccode\u003e*\u003c/code\u003e A lot of articles in BBC Sinhala and BBC Ukrainian were written in English and Russian\n              respectively. They were identified using \u003ca href=\"https://arxiv.org/abs/1607.01759\"\u003eFasttext\u003c/a\u003e and moved\n              accordingly.\n              \u003ccode\u003e**\u003c/code\u003e West African Pidgin English\n            \u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWhy is the Dataset in GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat does this dataset contribute toward better generation evaluation and why is it part of GEM?\n                  \u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eTraditional abstractive text summarization has been centered around English and other high-resource\n              languages. \u003cstrong\u003eXL-Sum\u003c/strong\u003e provides a large collection of high-quality article-summary pairs for\n              45 languages where the languages range from high-resource to extremely low-resource. This enables the\n              research community to explore the summarization capabilities of different models for multiple languages\n              and languages in isolation. We believe the addition of \u003cstrong\u003eXL-Sum\u003c/strong\u003e to GEM makes the domain of\n              abstractive text summarization more diversified and inclusive to the research community. We hope our\n              efforts in this work will encourage the community to push the boundaries of abstractive text summarization\n              beyond the English language, especially for low and mid-resource languages, bringing technological\n              advances to communities of these languages that have been traditionally under-served.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eUnique Language Coverage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset cover other languages than other datasets for the same task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDifference from other GEM datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat else sets this dataset apart from other similar datasets in GEM?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe summaries are highly concise and abstractive.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAbility that the Dataset measures\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eConciseness, abstractiveness, and overall summarization capability.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eConciseness, abstractiveness, and overall summarization capability.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eProposed Evaluation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the purpose of the metrics and evaluation methodology (including human\n                    evaluation) that the dataset creators used when introducing this task.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE is the de facto evaluation metric used for text summarization. However, it was designed\n              specifically for evaluating English texts. Due to the nature of the metric, scores are heavily dependent\n              on text tokenization / stemming / unnecessary character removal, etc. Some modifications to the original\n              ROUGE evaluation were done such as punctuation only removal, language specific tokenization/stemming to\n              enable reliable comparison of source and target summaries across different scripts.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eState-of-the-art text summarization models are heavily data-driven, i.e., a large number of\n              article-summary pairs are required to train them effectively. As a result, abstractive summarization has\n              centered around the English language, as most large abstractive summarization datasets are available in\n              English only. Though there have been some recent efforts for curating multilingual abstractive\n              summarization datasets, they are limited in terms of the number of languages covered, the number of\n              training samples, or both. To this end, we curate \u003cstrong\u003eXL-Sum\u003c/strong\u003e, a large-scale abstractive\n              summarization dataset of 1.35 million news articles from 45 languages crawled from the British\n              Broadcasting Corporation website.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eIntroduce new languages in the english-centric domain of abstractive text summarization and enable both\n              multilingual and per-language summarization.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eSource Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList the sources (one per line)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBritish Broadcasting Corporation (BBC) news websites.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eMultiple websites\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language content was written by professional news editors hired by BBC.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eNews\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe used 'NFKC' normalization on all text instances.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ealgorithmically\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFilter Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat were the selection criteria?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eWe designed a crawler to recursively crawl pages starting from the homepage by visiting different article\n              links present in each page visited. We were able to take advantage of the fact that all BBC sites have\n              somewhat similar structures, and were able to scrape articles from all sites. We discarded pages with no\n              textual contents (mostly pages consisting of multimedia contents) before further processing. We designed a\n              number of heuristics to make the extraction effective by carefully examining the HTML structures of the\n              crawled pages:\u003c/p\u003e\n            \u003col\u003e\n              \u003cli\u003eThe desired summary must be present within the beginning two paragraphs of an article.\u003c/li\u003e\n              \u003cli\u003eThe summary paragraph must have some portion of texts in bold format.\u003c/li\u003e\n              \u003cli\u003eThe summary paragraph may contain some hyperlinks that may not be bold. The proportion of bold texts\n                and hyperlinked texts to the total length of the paragraph in consideration must be at least 95%.\u003c/li\u003e\n              \u003cli\u003eAll texts except the summary and the headline must be included in the input text (including image\n                captions).\u003c/li\u003e\n              \u003cli\u003eThe input text must be at least twice as large as the summary.\u003c/li\u003e\n            \u003c/ol\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eConsent Policy Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the consent policy?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBBC's policy specifies that the text content within its websites can be used for non-commercial research\n              only.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003elikely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDetails on how Dataset Addresses the Needs\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe how this dataset addresses the needs of underserved communities.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThis dataset introduces summarization corpus for many languages where there weren't any datasets like\n              this curated before.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eYes\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e, \u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eresearch use only\u003c/code\u003e, \u003ccode\u003enon-commercial use only\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eTechnical Limitations\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any known technical limitations, such as spurrious correlations, train/test overlap,\n                    annotation biases, or mis-annotations, and cite the works that first identified these limitations\n                    when possible.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eHuman evaluation showed most languages had a high percentage of good summaries in the upper nineties,\n              almost none of the summaries contained any conflicting information, while about one-third on average had\n              information that was not directly inferrable from the source article. Since generally multiple articles\n              are written regarding an important event, there could be an overlap between the training and evaluation\n              data in terms on content.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eUnsuited Applications\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhen using a model trained on this dataset in a setting where users or the public may interact with\n                    its predictions, what are some pitfalls to look out for? In particular, describe some applications\n                    of the general task featured in this dataset that its curation or properties make it less suitable\n                    for.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is limited to news domain only. Hence it wouldn't be advisable to use a model trained on this\n              dataset for summarizing texts from a different domain i.e. literature, scientific text etc. Another\n              pitfall could be hallucinations in the model generated summary.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDiscouraged Use Cases\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat are some discouraged use cases of a model trained to maximize the proposed metrics on this\n                    dataset? In particular, think about settings where decisions made by a model that performs\n                    reasonably well on the metric my still have strong negative consequences for user or members of the\n                    public.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE evaluates the quality of the summary as a whole by considering up to 4-gram overlaps. Therefore, in\n              an article about India if the word \"India\" in the generated summary gets replaced by \"Pakistan\" due to\n              model hallucination, the overall score wouldn't be reduced significantly, but the entire meaning could get\n              changed.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"xlsum","type":"Summarization","languages":"Amharic, Arabic, Azerbaijani, Bengali, Bangla, Burmese, Chinese (family), English, French, Gujarati, Hausa, Hindi, Igbo, Indonesian, Japanese, Rundi, Korean, Kirghiz, Kyrgyz, Marathi, Nepali (individual language), Oromo, Pushto, Pashto, Persian, Ghanaian Pidgin English, Portuguese, Panjabi, Punjabi, Russian, Scottish Gaelic, Gaelic, Serbian, Romano-Serbian, Sinhala, Sinhalese, Somali, Spanish, Castilian, Swahili (individual language), Kiswahili, Tamil, Telugu, Thai, Tigrinya, Turkish, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yoruba","summary":"XLSum is a highly multilingual summarization dataset supporting 44 language. The data stems from BBC news articles."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"xlsum"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/xsum.html b/data_cards/xsum.html
index e664674a..eefa82f5 100644
--- a/data_cards/xsum.html
+++ b/data_cards/xsum.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->xsum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">xsum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->xsum</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">xsum</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
 
   <section class="datacard-section">
     <div class="datacard-summary">
@@ -1308,4 +1308,4 @@ <h5>Are the Language Producers Representative of the Language?
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"xsum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003exsum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eXSum is an English news summarization dataset where the task is to predict the first sentence of an article\n          from the rest of it.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/xsum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/xsum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/D18-1206\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eShashi Narayan, Shay B. Cohen, Mirella Lapata (all affiliated with University of Edinburgh at the time of\n          dataset creation)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eShashi Narayan\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGiven a news article, produce a single sentence summary of the content of the article.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/EdinburghNLP/XSum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/D18-1206\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@InProceedings{xsum-emnlp,\nauthor =      \"Shashi Narayan and Shay B. Cohen and Mirella Lapata\",\ntitle =       \"Don't Give Me the Details, Just the Summary! {T}opic-Aware Convolutional Neural Networks for Extreme Summarization\",\nbooktitle =   \"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing \",\nyear =        \"2018\",\naddress =     \"Brussels, Belgium\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eShashi Narayan\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:shashinarayan@google.com\"\u003eshashinarayan@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the source of the dataset are BBC articles, the language is in British English of the variation\n              written by journalists.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProfessional journalists\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is for the task of abstractive summarization in its extreme form, its about summarizing a\n              document in a single sentence. The idea is to create a short, one-sentence news summary answering the\n              question \"What is the article about?\".\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGiven a news article, produce a single sentence summary of the content of the article.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Edinburgh\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eShashi Narayan, Shay B. Cohen, Mirella Lapata (all affiliated with University of Edinburgh at the time of\n              dataset creation)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEuropean Research Council (Lapata; award number 681760), the European Union under the Horizon 2020 SUMMA\n              project (Narayan, Cohen; grant agreement 688139), and Huawei Technologies (Cohen).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original data card was written by Laura Perez-Beltrachini and the data loader by Yacine Jernite.\n              Sebastian Gehrmann migrated the data card to the new format and extended it. The v2 data loader was\n              migrated by Abinaya Mahendiran\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eDocument\u003c/code\u003e: Input news article.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eSummary\u003c/code\u003e: One sentence summary of the article.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eId\u003c/code\u003e: BBC ID of the article.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Document/Summary format is standard for summarization datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe labels are the first sentence of the source article.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'document': 'The researchers have sequenced the genome of a strain of bacterium that causes the virulent infection.\\nA survey in 2007 showed that bleeding canker had spread rapidly, with almost half of the two million horse chestnuts displaying symptoms of the disease.\\nThe findings have been published in the journal PLoS One.\\nA visible symptom of the disease is a lesion on the bark, which oozes a resin on to the trunk or sometimes the branches.\\nThe bark underneath the canker is killed, and if cankers manage to go all the way around the trunk then the horse chestnut (Aesculus hippocastanum) will die because it cuts off the food supply. [...]',\n'target': \"A team of UK scientists hopes to shed light on the mysteries of bleeding canker, a disease that is threatening the nation's horse chestnut trees.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eSection\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eNumber of Documents\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTraining\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e204,045\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eValidation\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e11,332\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTesting\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e11,334\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTotal\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e226k\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eSection\u003c/th\u003e\n                    \u003cth align=\"center\"\u003enumber of words\u003c/th\u003e\n                    \u003cth align=\"center\"\u003enumber of sentences\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eDocuments\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e431.07\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e19.77\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSummary\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e23.26\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e1.00\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe identifiers in the URLs were used to randomly split the dataset into training (90%, 204,045),\n              validation (5%, 11,332), and test (5%, 11,334) sets.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eComparable datasets are often very extractive which is not a strategy that works for one-sentence\n              summaries. The dataset curators thus created this dataset as a way to evaluate truly abstractive models\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSame as the communicative goal in GEM: A model should summarize a news article in a single sentence\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data was collected from articles between 2010 and 2017. No other information\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe collected articles included the following topics: News, Politics, Sports, Weather, Business,\n              Technology, Science, Health, Family, Education, Entertainment and Arts\u003c/p\u003e\n            \u003cp\u003eThe dataset curators also used LDA to gain insight into this question and found that the following were\n              the top keywords associated with each topic:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003cstrong\u003eT1\u003c/strong\u003e: charge, court, murder, police, arrest, guilty, sentence, boy, bail, space, crown,\n                trial\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT2\u003c/strong\u003e: church, abuse, bishop, child, catholic, gay, pope, school, christian, priest,\n                cardinal\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT3\u003c/strong\u003e: council, people, government, local, housing, home, house, property, city, plan,\n                authority\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT4\u003c/strong\u003e: clinton, party, trump, climate, poll, vote, plaid, election, debate, change,\n                candidate, campaign\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT5\u003c/strong\u003e: country, growth, report, business, export, fall, bank, security, economy, rise,\n                global, inflation\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT6\u003c/strong\u003e: hospital, patient, trust, nhs, people, care, health, service, staff, report,\n                review, system, child\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe text was extracted from the HTML of the webpage. No further processing was done.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe copyright license of the data allows reusing it for this purpose.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language and content of the data is focused on news and language in the UK and as such not\n              representative of the speakers world-wide. Existing selection biases of the BBC exist in this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"xsum","type":"Summarization","languages":"English","summary":"XSum is an English news summarization dataset where the task is to predict the first sentence of an article from the rest of it."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"xsum"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"xsum","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003exsum\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eXSum is an English news summarization dataset where the task is to predict the first sentence of an article\n          from the rest of it.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/xsum')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/xsum\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003en/a\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/D18-1206\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eShashi Narayan, Shay B. Cohen, Mirella Lapata (all affiliated with University of Edinburgh at the time of\n          dataset creation)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eShashi Narayan\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eGiven a news article, produce a single sentence summary of the content of the article.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003enone\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes/very likely\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eDownload\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to where the original dataset is hosted?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/EdinburghNLP/XSum\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://www.aclweb.org/anthology/D18-1206\"\u003eACL Anthology\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@InProceedings{xsum-emnlp,\nauthor =      \"Shashi Narayan and Shay B. Cohen and Mirella Lapata\",\ntitle =       \"Don't Give Me the Details, Just the Summary! {T}opic-Aware Convolutional Neural Networks for Extreme Summarization\",\nbooktitle =   \"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing \",\nyear =        \"2018\",\naddress =     \"Brussels, Belgium\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eShashi Narayan\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:shashinarayan@google.com\"\u003eshashinarayan@google.com\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCovered Dialects\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat dialects are covered? Are there multiple dialects per language?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSince the source of the dataset are BBC articles, the language is in British English of the variation\n              written by journalists.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eEnglish\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eWhose Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhose language is in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eProfessional journalists\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe dataset is for the task of abstractive summarization in its extreme form, its about summarizing a\n              document in a single sentence. The idea is to create a short, one-sentence news summary answering the\n              question \"What is the article about?\".\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eGiven a news article, produce a single sentence summary of the content of the article.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCuration Organization(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eName the organization(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eUniversity of Edinburgh\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eShashi Narayan, Shay B. Cohen, Mirella Lapata (all affiliated with University of Edinburgh at the time of\n              dataset creation)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eFunding\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho funded the data creation?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEuropean Research Council (Lapata; award number 681760), the European Union under the Horizon 2020 SUMMA\n              project (Narayan, Cohen; grant agreement 688139), and Huawei Technologies (Cohen).\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe original data card was written by Laura Perez-Beltrachini and the data loader by Yacine Jernite.\n              Sebastian Gehrmann migrated the data card to the new format and extended it. The v2 data loader was\n              migrated by Abinaya Mahendiran\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Fields\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eList and describe the fields present in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003e\u003ccode\u003eDocument\u003c/code\u003e: Input news article.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eSummary\u003c/code\u003e: One sentence summary of the article.\u003c/li\u003e\n              \u003cli\u003e\u003ccode\u003eId\u003c/code\u003e: BBC ID of the article.\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eReason for Structure\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the dataset structure determined?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe Document/Summary format is standard for summarization datasets.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eHow were labels chosen?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow were the labels chosen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe labels are the first sentence of the source article.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eExample Instance\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a JSON formatted example of a typical instance in the dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e{\n'document': 'The researchers have sequenced the genome of a strain of bacterium that causes the virulent infection.\\nA survey in 2007 showed that bleeding canker had spread rapidly, with almost half of the two million horse chestnuts displaying symptoms of the disease.\\nThe findings have been published in the journal PLoS One.\\nA visible symptom of the disease is a lesion on the bark, which oozes a resin on to the trunk or sometimes the branches.\\nThe bark underneath the canker is killed, and if cankers manage to go all the way around the trunk then the horse chestnut (Aesculus hippocastanum) will die because it cuts off the food supply. [...]',\n'target': \"A team of UK scientists hopes to shed light on the mysteries of bleeding canker, a disease that is threatening the nation's horse chestnut trees.\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eSection\u003c/th\u003e\n                    \u003cth align=\"center\"\u003eNumber of Documents\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTraining\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e204,045\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eValidation\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e11,332\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTesting\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e11,334\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eTotal\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e226k\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n\n            \u003cdiv class=\"table-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand table\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003ctable\u003e\n                \u003cthead\u003e\n                  \u003ctr\u003e\n                    \u003cth\u003eSection\u003c/th\u003e\n                    \u003cth align=\"center\"\u003enumber of words\u003c/th\u003e\n                    \u003cth align=\"center\"\u003enumber of sentences\u003c/th\u003e\n                  \u003c/tr\u003e\n                \u003c/thead\u003e\n                \u003ctbody\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eDocuments\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e431.07\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e19.77\u003c/td\u003e\n                  \u003c/tr\u003e\n                  \u003ctr\u003e\n                    \u003ctd\u003eSummary\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e23.26\u003c/td\u003e\n                    \u003ctd align=\"center\"\u003e1.00\u003c/td\u003e\n                  \u003c/tr\u003e\n                \u003c/tbody\u003e\n              \u003c/table\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eSplitting Criteria\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits\n                    (e.g., if the training annotations are machine-generated and the dev and test ones are created by\n                    humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe identifiers in the URLs were used to randomly split the dataset into training (90%, 204,045),\n              validation (5%, 11,332), and test (5%, 11,334) sets.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eOriginal Curation Rationale\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eOriginal curation rationale\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eComparable datasets are often very extractive which is not a strategy that works for one-sentence\n              summaries. The dataset curators thus created this dataset as a way to evaluate truly abstractive models\n            \u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat was the communicative goal?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSame as the communicative goal in GEM: A model should summarize a news article in a single sentence\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eLanguage Producers\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat further information do we have on the language producers?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe data was collected from articles between 2010 and 2017. No other information\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eTopics Covered\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the language in the dataset focus on specific topics? How would you describe them?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe collected articles included the following topics: News, Politics, Sports, Weather, Business,\n              Technology, Science, Health, Family, Education, Entertainment and Arts\u003c/p\u003e\n            \u003cp\u003eThe dataset curators also used LDA to gain insight into this question and found that the following were\n              the top keywords associated with each topic:\u003c/p\u003e\n            \u003cul\u003e\n              \u003cli\u003e\u003cstrong\u003eT1\u003c/strong\u003e: charge, court, murder, police, arrest, guilty, sentence, boy, bail, space, crown,\n                trial\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT2\u003c/strong\u003e: church, abuse, bishop, child, catholic, gay, pope, school, christian, priest,\n                cardinal\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT3\u003c/strong\u003e: council, people, government, local, housing, home, house, property, city, plan,\n                authority\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT4\u003c/strong\u003e: clinton, party, trump, climate, poll, vote, plaid, election, debate, change,\n                candidate, campaign\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT5\u003c/strong\u003e: country, growth, report, business, export, fall, bank, security, economy, rise,\n                global, inflation\u003c/li\u003e\n              \u003cli\u003e\u003cstrong\u003eT6\u003c/strong\u003e: hospital, patient, trust, nhs, people, care, health, service, staff, report,\n                review, system, child\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot validated\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eData Preprocessing\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the text data pre-processed? (Enter N/A if the text was not pre-processed)\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe text was extracted from the HTML of the webpage. No further processing was done.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enone\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eJustification for Using the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf not, what is the justification for reusing the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe copyright license of the data allows reusing it for this purpose.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes/very likely\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCategories of PII\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat categories of PII are present or suspected in the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003egeneric PII\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAny PII Identification?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDid the curators use any automatic/manual method to identify PII in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno identification\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eunsure\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eAre the Language Producers Representative of the Language?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the distribution of language producers in the dataset accurately represent the full\n                    distribution of speakers of the language world-wide? If not, how does it differ?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe language and content of the data is focused on news and language in the UK and as such not\n              representative of the speakers world-wide. Existing selection biases of the BBC exist in this dataset.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"xsum","type":"Summarization","languages":"English","summary":"XSum is an English news summarization dataset where the task is to predict the first sentence of an article from the rest of it."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"xsum"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/data_cards/xwikis.html b/data_cards/xwikis.html
index 385583af..fc661d3b 100644
--- a/data_cards/xwikis.html
+++ b/data_cards/xwikis.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->xwikis</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-54179cce9b48b926.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">xwikis</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->xwikis</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/data_cards/%5Bid%5D-052721b315d249c5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main><article><a href="/data_cards"><a><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="arrow-left" class="svg-inline--fa fa-arrow-left utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M9.4 233.4c-12.5 12.5-12.5 32.8 0 45.3l160 160c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L109.2 288 416 288c17.7 0 32-14.3 32-32s-14.3-32-32-32l-306.7 0L214.6 118.6c12.5-12.5 12.5-32.8 0-45.3s-32.8-12.5-45.3 0l-160 160z"></path></svg></a></a><span class="utils_spacer__a__NY"></span><span class="utils_headingXl__zlq1q">xwikis</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Summarization</span><div class="datacard-wrapper"><div class="datacard">
   <section class="datacard-section">
     <div class="datacard-summary">
       <h2>xwikis</h2>
@@ -1278,4 +1278,4 @@ <h4>Known Technical Limitations</h4>
       </div>
     </div>
   </section>
-</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"xwikis","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003exwikis\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe XWikis Corpus provides datasets with different language pairs and directions for cross-lingual and\n          multi-lingual abstractive document summarisation.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/xwikis')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/xwikis\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/lauhaide/clads\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2202.09583\"\u003ehttps://arxiv.org/abs/2202.09583\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLaura Perez-Beltrachini (University of Edinburgh)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLaura Perez-Beltrachini\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eCzech\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eEntity descriptive summarisation, that is, generate a summary that conveys the most salient facts of a\n          document related to a given entity.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003efound\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/lauhaide/clads\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2202.09583\"\u003ehttps://arxiv.org/abs/2202.09583\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@InProceedings{clads-emnlp,\nauthor =      \"Laura Perez-Beltrachini and Mirella Lapata\",\ntitle =       \"Models and Datasets for Cross-Lingual Summarisation\",\nbooktitle =   \"Proceedings of The 2021 Conference on Empirical Methods in Natural Language Processing \",\nyear =        \"2021\",\naddress =     \"Punta Cana, Dominican Republic\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:lperez@ed.ac.uk\"\u003elperez@ed.ac.uk\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eCzech\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCross-lingual and Multi-lingual single long input document abstractive summarisation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEntity descriptive summarisation, that is, generate a summary that conveys the most salient facts of a\n              document related to a given entity.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini (University of Edinburgh)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini (University of Edinburgh) and Ronald Cardenas (University of Edinburgh)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor each language pair and direction there exists a train/valid/test split.\n              The test split is a sample of size 7k from the intersection of titles existing in the four languages\n              (cs,fr,en,de).\n              Train/valid are randomly split.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eidentification of entity salient information\u003c/li\u003e\n              \u003cli\u003etranslation\u003c/li\u003e\n              \u003cli\u003emulti-linguality\u003c/li\u003e\n              \u003cli\u003ecross-lingual transfer, zero-shot, few-shot\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE-1/2/L\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003efound\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe input documents have section structure information.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by another rater\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBilingual annotators assessed the content overlap of source document and target summaries.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"xwikis","type":"Summarization","languages":"German, English, French, Czech","summary":"The XWikis Corpus provides datasets with different language pairs and directions for cross-lingual and multi-lingual abstractive document summarisation."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"xwikis"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"id":"xwikis","contentHtml":"\u003cdiv class=\"datacard\"\u003e\n  \u003csection class=\"datacard-section\"\u003e\n    \u003cdiv class=\"datacard-summary\"\u003e\n      \u003ch2\u003exwikis\u003c/h2\u003e\n      \u003cdiv class=\"summary-content\"\u003e\n        \u003cp\u003eThe XWikis Corpus provides datasets with different language pairs and directions for cross-lingual and\n          multi-lingual abstractive document summarisation.\u003c/p\u003e\n        \u003cp\u003eYou can load the dataset via:\u003c/p\u003e\n\n        \u003cdiv class=\"code-wrapper\"\u003e\n          \u003cdiv class=\"toolbar\"\u003e\n            \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n          \u003c/div\u003e\n          \u003cpre\u003e\u003ccode\u003eimport datasets\ndata = datasets.load_dataset('GEM/xwikis')\n\u003c/code\u003e\u003c/pre\u003e\n        \u003c/div\u003e\n\n        \u003cp\u003eThe data loader can be found \u003ca href=\"https://huggingface.co/datasets/GEM/xwikis\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003ewebsite\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://github.com/lauhaide/clads\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003epaper\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2202.09583\"\u003ehttps://arxiv.org/abs/2202.09583\u003c/a\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field\"\u003e\n\n        \u003ch5\u003eauthors\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLaura Perez-Beltrachini (University of Edinburgh)\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section quick\"\u003e\n    \u003ch3 class=\"section-title\"\u003eQuick-Use\u003c/h3\u003e\n\n    \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eContact Name\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                dataset.\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eLaura Perez-Beltrachini\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eMultilingual?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eyes\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eCovered Languages\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eCzech\u003c/code\u003e\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eLicense\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field periscope\"\u003e\n\n        \u003ch5\u003eCommunicative Goal\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this dataset.\n              \u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eEntity descriptive summarisation, that is, generate a summary that conveys the most salient facts of a\n          document related to a given entity.\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eAdditional Annotations?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003efound\u003c/p\u003e\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-field telescope\"\u003e\n\n        \u003ch5\u003eContains PII?\n\n          \u003cdiv class=\"tooltip\"\u003e\n            \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n            \u003cdiv class=\"tooltip-text\"\u003e\n              \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data creators\n                or subjects?\u003c/p\u003e\n            \u003c/div\u003e\n          \u003c/div\u003e\n\n        \u003c/h5\u003e\n\n        \u003cp\u003eno PII\u003c/p\u003e\n      \u003c/div\u003e\n    \u003c/div\u003e\n\n  \u003c/section\u003e\n\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Overview\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eCredit\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDataset Structure\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eWhere to find the Data and its Documentation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWebpage\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the webpage for the dataset (if it exists)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://github.com/lauhaide/clads\"\u003eGithub\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePaper\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the link to the paper describing the dataset (open access preferred)?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"https://arxiv.org/abs/2202.09583\"\u003ehttps://arxiv.org/abs/2202.09583\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eBibTex\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide the BibTex-formatted reference for the dataset. Please use the correct published version\n                    (ACL anthology, etc.) instead of google scholar created Bibtex.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n\n            \u003cdiv class=\"code-wrapper\"\u003e\n              \u003cdiv class=\"toolbar\"\u003e\n                \u003cdiv class=\"copy-icon\" title=\"Click to copy code block\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"expand-modal-icon\" title=\"Click to expand code block\"\u003e\u003c/div\u003e\n              \u003c/div\u003e\n              \u003cpre\u003e\u003ccode\u003e@InProceedings{clads-emnlp,\nauthor =      \"Laura Perez-Beltrachini and Mirella Lapata\",\ntitle =       \"Models and Datasets for Cross-Lingual Summarisation\",\nbooktitle =   \"Proceedings of The 2021 Conference on Empirical Methods in Natural Language Processing \",\nyear =        \"2021\",\naddress =     \"Punta Cana, Dominican Republic\",\n}\n\u003c/code\u003e\u003c/pre\u003e\n            \u003c/div\u003e\n\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Name\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the name of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eContact Email\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf known, provide the email of at least one person the reader can contact for questions about the\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ca href=\"mailto:lperez@ed.ac.uk\"\u003elperez@ed.ac.uk\u003c/a\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHas a Leaderboard?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have an active leaderboard?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguages and Intended Use\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMultilingual?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset multilingual?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCovered Languages\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat languages/dialects are covered in the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eGerman\u003c/code\u003e, \u003ccode\u003eEnglish\u003c/code\u003e, \u003ccode\u003eFrench\u003c/code\u003e, \u003ccode\u003eCzech\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eLicense\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the license of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003ecc-by-sa-4.0: Creative Commons Attribution Share Alike 4.0 International\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eIntended Use\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat is the intended use of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eCross-lingual and Multi-lingual single long input document abstractive summarisation.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrimary Task\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat primary task does the dataset support?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eSummarization\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCommunicative Goal\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eProvide a short description of the communicative goal of a model trained for this task on this\n                    dataset.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eEntity descriptive summarisation, that is, generate a summary that conveys the most salient facts of a\n              document related to a given entity.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eCredit\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eCuration Organization Type(s)\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIn what kind of organization did the dataset curation happen?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eacademic\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eDataset Creators\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho created the original dataset? List the people involved in collecting the dataset and their\n                    affiliation(s).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini (University of Edinburgh)\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eWho added the Dataset to GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWho contributed to the data card and adding the dataset to GEM? List the people+affiliations\n                    involved in creating this data card and who helped integrate this dataset into GEM.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eLaura Perez-Beltrachini (University of Edinburgh) and Ronald Cardenas (University of Edinburgh)\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDataset Structure\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eData Splits\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eFor each language pair and direction there exists a train/valid/test split.\n              The test split is a sample of size 7k from the intersection of titles existing in the four languages\n              (cs,fr,en,de).\n              Train/valid are randomly split.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset in GEM\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eRationale for Inclusion in GEM\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSimilar Datasets\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDo other datasets for the high level task exist?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGEM-Specific Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eModificatied for GEM?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHas the GEM version of the dataset been modified in any way (data, processing, splits) from the\n                    original curated data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Splits?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes GEM provide additional splits to the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eGetting Started with the Task\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003ePrevious Results\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Results\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Results\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eMeasured Model Abilities\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat aspect of model ability can be measured with this dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cul\u003e\n              \u003cli\u003eidentification of entity salient information\u003c/li\u003e\n              \u003cli\u003etranslation\u003c/li\u003e\n              \u003cli\u003emulti-linguality\u003c/li\u003e\n              \u003cli\u003ecross-lingual transfer, zero-shot, few-shot\u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eMetrics\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat metrics are typically used for this task?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eROUGE\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003ePrevious results available?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre previous results available?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eyes\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eOther Evaluation Approaches\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWhat evaluation approaches have others used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eROUGE-1/2/L\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eDataset Curation\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLanguage Data\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eConsent\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eMaintenance\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eOriginal Curation\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eSourced from Different Sources\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIs the dataset aggregated from different data sources?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLanguage Data\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eHow was Language Data Obtained?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eHow was the language data obtained?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eFound\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWhere was it found?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eIf found, where from?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003eSingle website\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eData Validation\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas the text validated by a different worker or a data curator?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eother\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eWas Data Filtered?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWere text instances selected or filtered?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003enot filtered\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eStructured Annotations\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAdditional Annotations?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the dataset have additional annotations for each instance?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003efound\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAnnotation Service?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas an annotation service used?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eAnnotation Values\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003ePurpose and values for each annotation\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eThe input documents have section structure information.\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Quality Control?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eQuality control measures?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003evalidated by another rater\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field microscope\"\u003e\n\n            \u003ch5\u003eQuality Control Details\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDescribe the quality control measures that were taken.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eBilingual annotators assessed the content overlap of source document and target summaries.\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eConsent\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Consent Policy?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eWas there a consent policy involved when gathering the data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrivate Identifying Information (PII)\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eContains PII?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the source language data likely contain Personal Identifying Information about the data\n                    creators or subjects?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno PII\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eMaintenance\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Maintenance Plan?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes the original dataset have a maintenance plan?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eBroader Social Context\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePrevious Work on the Social Impact of the Dataset\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eUsage of Models based on the Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre you aware of cases where models trained on the task featured in this dataset ore related tasks\n                    have been used in automated systems?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eImpact on Under-Served Communities\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAddresses needs of underserved Communities?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eDoes this dataset address the needs of communities that are traditionally underserved in language\n                    technology, and particularly language generation technology? Communities may be underserved for\n                    exemple because their language, language variety, or social or geographical context is\n                    underepresented in NLP and NLG resources (datasets and models).\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eDiscussion of Biases\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field telescope\"\u003e\n\n            \u003ch5\u003eAny Documented Social Biases?\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eAre there documented social biases in the dataset? Biases in this context are variations in the\n                    ways members of different social categories are represented that can have harmful downstream\n                    consequences for members of the more disadvantaged group.\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003eno\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\n  \u003csection class=\"datacard-section open\"\u003e\n\n    \u003cdiv class=\"datacard-section-preview\"\u003e\n      \u003ch3\u003eConsiderations for Using the Data\n\n        \u003cdiv class=\"tooltip\"\u003e\n          \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n          \u003cdiv class=\"tooltip-text\"\u003e\n            \u003cul\u003e\n              \u003cli\u003e\n                \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eLicenses\u003c/h4\u003e\n              \u003c/li\u003e\n              \u003cli\u003e\n                \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n              \u003c/li\u003e\n            \u003c/ul\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/h3\u003e\n      \u003cbutton class=\"expand-button\"\u003e\n        \u003csvg fill=\"#3c4f50\" height=\"24px\" viewBox=\"0 0 24 24\" width=\"24px\" xmlns=\"http://www.w3.org/2000/svg\"\u003e\n          \u003cpath d=\"M0 0h24v24H0z\" fill=\"none\"\u003e\u003c/path\u003e\n          \u003cpath d=\"M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z\"\u003e\n          \u003c/path\u003e\n        \u003c/svg\u003e\n      \u003c/button\u003e\n    \u003c/div\u003e\n\n    \u003cdiv class=\"datacard-collapsible\"\u003e\n\n\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003ePII Risks and Liability\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eLicenses\u003c/h4\u003e\n\n\n        \u003cdiv class=\"datacard-field-wrapper\"\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Dataset\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Intended Use part of the Data Overview Section, which of the following\n                    best describe the copyright and licensing status of the dataset?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n\n          \u003cdiv class=\"datacard-field periscope\"\u003e\n\n            \u003ch5\u003eCopyright Restrictions on the Language Data\n\n              \u003cdiv class=\"tooltip\"\u003e\n                \u003cdiv class=\"tooltip-icon\"\u003e\u003c/div\u003e\n                \u003cdiv class=\"tooltip-text\"\u003e\n                  \u003cp\u003eBased on your answers in the Language part of the Data Curation Section, which of the following\n                    best describe the copyright and licensing status of the underlying language data?\u003c/p\u003e\n                \u003c/div\u003e\n              \u003c/div\u003e\n\n            \u003c/h5\u003e\n\n            \u003cp\u003e\u003ccode\u003epublic domain\u003c/code\u003e\u003c/p\u003e\n          \u003c/div\u003e\n        \u003c/div\u003e\n\n      \u003c/div\u003e\n\n      \u003cdiv class=\"datacard-subsection\"\u003e\n        \u003ch4\u003eKnown Technical Limitations\u003c/h4\u003e\n\n\n      \u003c/div\u003e\n    \u003c/div\u003e\n  \u003c/section\u003e\n\u003c/div\u003e","title":"xwikis","type":"Summarization","languages":"German, English, French, Czech","summary":"The XWikis Corpus provides datasets with different language pairs and directions for cross-lingual and multi-lingual abstractive document summarisation."}},"__N_SSG":true},"page":"/data_cards/[id]","query":{"id":"xwikis"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/hackathon.html b/hackathon.html
index aabc789f..c08f1e0e 100644
--- a/hackathon.html
+++ b/hackathon.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEMv2 Hackathon</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/hackathon-5aa098cfaafb9146.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Hackathon for GEMv2</span><span class="utils_smallSpace__dcJPu"></span><div><h1 id="user-content-gemv2-hackathon">GEMv2 Hackathon</h1>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEMv2 Hackathon</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/hackathon-d6bebdd846f9bd04.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Hackathon for GEMv2</span><span class="utils_smallSpace__dcJPu"></span><div><h1 id="user-content-gemv2-hackathon">GEMv2 Hackathon</h1>
 <p>The goal of the hackathon is to work together to add 20+ new tasks into the
 GEM infrastructure. This page provides answer to the most frequently asked questions.</p>
 <h2 id="user-content-important-dates">Important Dates</h2>
@@ -27,4 +27,4 @@ <h3 id="user-content-creating-challenge-sets">Creating challenge sets</h3>
 <p>We will be using the NL-Augmenter infrastructure to create challenge sets.
 You can use it to create (1) transformations and (2) filters. Tutorials for
 multiple possible paths are added on our <a href="/tutorials">tutorial page</a></p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"sharedTaskData":{"contentHtml":"\u003ch1 id=\"user-content-gemv2-hackathon\"\u003eGEMv2 Hackathon\u003c/h1\u003e\n\u003cp\u003eThe goal of the hackathon is to work together to add 20+ new tasks into the\nGEM infrastructure. This page provides answer to the most frequently asked questions.\u003c/p\u003e\n\u003ch2 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h2\u003e\n\u003cp\u003e\u003cstrong\u003eStart\u003c/strong\u003e November 12, 2021\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eEnd\u003c/strong\u003e November 24, 2021\u003c/p\u003e\n\u003ch2 id=\"user-content-signing-up\"\u003eSigning Up\u003c/h2\u003e\n\u003cp\u003eSimply follow the instructions in \u003ca href=\"https://docs.google.com/spreadsheets/d/13JwkZIZNcAO4n6I-xsAO0Z_UfJk3RobOqq3LHozNaOE/edit?usp=sharing\"\u003ethis spreadsheet\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-communication\"\u003eCommunication\u003c/h2\u003e\n\u003col\u003e\n\u003cli\u003e\u003cstrong\u003eAnnouncements\u003c/strong\u003e: We will send all announcements to the \u003ca href=\"mailto:gem-v2-participants@googlegroups.com\"\u003egem-v2-participants@googlegroups.com\u003c/a\u003e list.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eAsynchronous Questions\u003c/strong\u003e: You can join #hackathon-participants in our slack channel (\u003ca href=\"https://join.slack.com/t/gem-benchmark/shared_invite/zt-vf7zk4s1-nQmdVTAL_OFfluPs0VFK5g\"\u003ejoin here\u003c/a\u003e) to ask questions.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eIn-person Questions\u003c/strong\u003e: Please add \u003ca href=\"https://calendar.google.com/calendar/render?cid=99nmjafb0i0ct37iukga5afifg@group.calendar.google.com\"\u003eour calendar\u003c/a\u003e and you will see a first set of office hours you can join to discuss any issues you may run into. We will schedule more sessions as needed.\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch2 id=\"user-content-getting-started\"\u003eGetting Started\u003c/h2\u003e\n\u003cp\u003eThere will be three parts to each participation:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003eAdding a data loader\u003c/li\u003e\n\u003cli\u003eWriting a data card\u003c/li\u003e\n\u003cli\u003eCreating initial challenge sets\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-adding-a-data-loader\"\u003eAdding a data loader\u003c/h3\u003e\n\u003cp\u003eSimply follow the instructions in \u003ca href=\"/tutorials/new_data_loader\"\u003eour tutorial\u003c/a\u003e on our tutorial page.\u003c/p\u003e\n\u003ch3 id=\"user-content-writing-a-data-card\"\u003eWriting a data card\u003c/h3\u003e\n\u003cp\u003e[To be added]\u003c/p\u003e\n\u003ch3 id=\"user-content-creating-challenge-sets\"\u003eCreating challenge sets\u003c/h3\u003e\n\u003cp\u003eWe will be using the NL-Augmenter infrastructure to create challenge sets.\nYou can use it to create (1) transformations and (2) filters. Tutorials for\nmultiple possible paths are added on our \u003ca href=\"/tutorials\"\u003etutorial page\u003c/a\u003e\u003c/p\u003e\n"}},"__N_SSG":true},"page":"/hackathon","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"sharedTaskData":{"contentHtml":"\u003ch1 id=\"user-content-gemv2-hackathon\"\u003eGEMv2 Hackathon\u003c/h1\u003e\n\u003cp\u003eThe goal of the hackathon is to work together to add 20+ new tasks into the\nGEM infrastructure. This page provides answer to the most frequently asked questions.\u003c/p\u003e\n\u003ch2 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h2\u003e\n\u003cp\u003e\u003cstrong\u003eStart\u003c/strong\u003e November 12, 2021\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eEnd\u003c/strong\u003e November 24, 2021\u003c/p\u003e\n\u003ch2 id=\"user-content-signing-up\"\u003eSigning Up\u003c/h2\u003e\n\u003cp\u003eSimply follow the instructions in \u003ca href=\"https://docs.google.com/spreadsheets/d/13JwkZIZNcAO4n6I-xsAO0Z_UfJk3RobOqq3LHozNaOE/edit?usp=sharing\"\u003ethis spreadsheet\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-communication\"\u003eCommunication\u003c/h2\u003e\n\u003col\u003e\n\u003cli\u003e\u003cstrong\u003eAnnouncements\u003c/strong\u003e: We will send all announcements to the \u003ca href=\"mailto:gem-v2-participants@googlegroups.com\"\u003egem-v2-participants@googlegroups.com\u003c/a\u003e list.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eAsynchronous Questions\u003c/strong\u003e: You can join #hackathon-participants in our slack channel (\u003ca href=\"https://join.slack.com/t/gem-benchmark/shared_invite/zt-vf7zk4s1-nQmdVTAL_OFfluPs0VFK5g\"\u003ejoin here\u003c/a\u003e) to ask questions.\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eIn-person Questions\u003c/strong\u003e: Please add \u003ca href=\"https://calendar.google.com/calendar/render?cid=99nmjafb0i0ct37iukga5afifg@group.calendar.google.com\"\u003eour calendar\u003c/a\u003e and you will see a first set of office hours you can join to discuss any issues you may run into. We will schedule more sessions as needed.\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch2 id=\"user-content-getting-started\"\u003eGetting Started\u003c/h2\u003e\n\u003cp\u003eThere will be three parts to each participation:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003eAdding a data loader\u003c/li\u003e\n\u003cli\u003eWriting a data card\u003c/li\u003e\n\u003cli\u003eCreating initial challenge sets\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-adding-a-data-loader\"\u003eAdding a data loader\u003c/h3\u003e\n\u003cp\u003eSimply follow the instructions in \u003ca href=\"/tutorials/new_data_loader\"\u003eour tutorial\u003c/a\u003e on our tutorial page.\u003c/p\u003e\n\u003ch3 id=\"user-content-writing-a-data-card\"\u003eWriting a data card\u003c/h3\u003e\n\u003cp\u003e[To be added]\u003c/p\u003e\n\u003ch3 id=\"user-content-creating-challenge-sets\"\u003eCreating challenge sets\u003c/h3\u003e\n\u003cp\u003eWe will be using the NL-Augmenter infrastructure to create challenge sets.\nYou can use it to create (1) transformations and (2) filters. Tutorials for\nmultiple possible paths are added on our \u003ca href=\"/tutorials\"\u003etutorial page\u003c/a\u003e\u003c/p\u003e\n"}},"__N_SSG":true},"page":"/hackathon","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/index.html b/index.html
index e9058569..d9daa991 100644
--- a/index.html
+++ b/index.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/553834eb3ba55b34.css" as="style"/><link rel="stylesheet" href="/_next/static/css/553834eb3ba55b34.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/index-51845c4fd329985f.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><div class="index_centerpage__4uMpW"><p class="index_description__3Pgig">GEM is a benchmark environment for Natural Language Generation with a focus on its Evaluation, both through human annotations and automated Metrics.</p><p class="index_description__3Pgig">GEM aims to:</p><ul class="index_description__3Pgig"><li>measure NLG progress across many NLG tasks across languages.</li><li>audit data and models and present results via data cards and model robustness reports.</li><li>develop standards for evaluation of generated text using both automated and human metrics.</li></ul><p class="index_description__3Pgig">We will regularly update GEM and to encourage more inclusive practices in evaluation by extending existing data or developing datasets for additional languages.</p><div class="index_grid__m40sg"><a href="/data_cards"><a class="index_card__e904y"><h3>Data Cards</h3></a></a><a href="/tutorials"><a class="index_card__e904y"><h3>Tutorials</h3></a></a><a href="/results"><a class="index_card__e904y"><h3>Results</h3></a></a><a href="/papers"><a class="index_card__e904y"><h3>Papers</h3></a></a><a href="/nl_augmenter"><a class="index_card__e904y"><h3>NL-Augmenter</h3></a></a><a href="/workshop"><a class="index_card__e904y"><h3>Workshop</h3></a></a></div></div></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/553834eb3ba55b34.css" as="style"/><link rel="stylesheet" href="/_next/static/css/553834eb3ba55b34.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/index-2d088c50190f330f.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><div class="index_centerpage__4uMpW"><p class="index_description__3Pgig">GEM is a benchmark environment for Natural Language Generation with a focus on its Evaluation, both through human annotations and automated Metrics.</p><p class="index_description__3Pgig">GEM aims to:</p><ul class="index_description__3Pgig"><li>measure NLG progress across many NLG tasks across languages.</li><li>audit data and models and present results via data cards and model robustness reports.</li><li>develop standards for evaluation of generated text using both automated and human metrics.</li></ul><p class="index_description__3Pgig">We will regularly update GEM and to encourage more inclusive practices in evaluation by extending existing data or developing datasets for additional languages.</p><div class="index_grid__m40sg"><a class="index_card__e904y" href="/data_cards"><h3>Data Cards</h3></a><a class="index_card__e904y" href="/tutorials"><h3>Tutorials</h3></a><a class="index_card__e904y" href="/results"><h3>Results</h3></a><a class="index_card__e904y" href="/papers"><h3>Papers</h3></a><a class="index_card__e904y" href="/nl_augmenter"><h3>NL-Augmenter</h3></a><a class="index_card__e904y" href="/workshop"><h3>Workshop</h3></a></div></div></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/","query":{},"buildId":"V1edrWahfIsCPthpIgASU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/model_cards.html b/model_cards.html
index 454a9965..4632bf48 100644
--- a/model_cards.html
+++ b/model_cards.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Model Cards</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/6a1c82b8c4bd78ef.css" as="style"/><link rel="stylesheet" href="/_next/static/css/6a1c82b8c4bd78ef.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards-c22039ba8c09fe44.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Model Cards</span><p class="model_cards_description__3OL_g">The list below links to the work-in-progress data cards for models submitted to GEM. As part of our submission process, we ask participants a series of questions about their models. The current version of our model cards lists the provided answers verbatim. The submission form can be found <a href="https://forms.gle/pds6cbBf2Gf2VGMv7" target="_blank">here</a>. The template used to produce the statements and can be found here: [<a href="/model_card_template.md"><a download="" target="_blank">download template</a></a>].</p><span class="utils_smallSpace__dcJPu"></span><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a href="/model_cards/SimpleNER"><a class="model_cards_larger__R2cNM">SimpleNER</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Shared Task 2021</small><span class="utils_smallSpace__dcJPu"></span><div class="model_cards_model__JYTdb">Finetuned Transformer architecture for simplification.</div></li><li class="utils_listItem__6FEiz"><a href="/model_cards/FB"><a class="model_cards_larger__R2cNM">Self-Training, Acceptability Classifiers and Context-Conditioning</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Shared Task 2021</small><span class="utils_smallSpace__dcJPu"></span><div class="model_cards_model__JYTdb">BART together with RoBERTa classifiers and context.</div></li><li class="utils_listItem__6FEiz"><a href="/model_cards/POINTER"><a class="model_cards_larger__R2cNM">POINTER</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Shared Task 2021</small><span class="utils_smallSpace__dcJPu"></span><div class="model_cards_model__JYTdb">POINTER is a hybrid architecture, combining transformers with insertion-based networks.</div></li><li class="utils_listItem__6FEiz"><a href="/model_cards/NUIG-DSI"><a class="model_cards_larger__R2cNM">NUIG-DSI</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Shared Task 2021</small><span class="utils_smallSpace__dcJPu"></span><div class="model_cards_model__JYTdb">Finetuned T5 with additional pretraining data.</div></li></ul></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"allData":[{"id":"SimpleNER","title":"SimpleNER","type":"Shared Task 2021","background":"Finetuned Transformer architecture for simplification."},{"id":"FB","title":"Self-Training, Acceptability Classifiers and Context-Conditioning","type":"Shared Task 2021","background":"BART together with RoBERTa classifiers and context."},{"id":"POINTER","title":"POINTER","type":"Shared Task 2021","background":"POINTER is a hybrid architecture, combining transformers with insertion-based networks."},{"id":"NUIG-DSI","title":"NUIG-DSI","type":"Shared Task 2021","background":"Finetuned T5 with additional pretraining data."}]},"__N_SSG":true},"page":"/model_cards","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Model Cards</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/6a1c82b8c4bd78ef.css" as="style"/><link rel="stylesheet" href="/_next/static/css/6a1c82b8c4bd78ef.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards-eb373565bd815f35.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Model Cards</span><p class="model_cards_description__3OL_g">The list below links to the work-in-progress data cards for models submitted to GEM. As part of our submission process, we ask participants a series of questions about their models. The current version of our model cards lists the provided answers verbatim. The submission form can be found <a href="https://forms.gle/pds6cbBf2Gf2VGMv7" target="_blank">here</a>. The template used to produce the statements and can be found here: [<a download="" target="_blank" href="/model_card_template.md">download template</a>].</p><span class="utils_smallSpace__dcJPu"></span><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a class="model_cards_larger__R2cNM" href="/model_cards/SimpleNER">SimpleNER</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Shared Task 2021</small><span class="utils_smallSpace__dcJPu"></span><div class="model_cards_model__JYTdb">Finetuned Transformer architecture for simplification.</div></li><li class="utils_listItem__6FEiz"><a class="model_cards_larger__R2cNM" href="/model_cards/FB">Self-Training, Acceptability Classifiers and Context-Conditioning</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Shared Task 2021</small><span class="utils_smallSpace__dcJPu"></span><div class="model_cards_model__JYTdb">BART together with RoBERTa classifiers and context.</div></li><li class="utils_listItem__6FEiz"><a class="model_cards_larger__R2cNM" href="/model_cards/POINTER">POINTER</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Shared Task 2021</small><span class="utils_smallSpace__dcJPu"></span><div class="model_cards_model__JYTdb">POINTER is a hybrid architecture, combining transformers with insertion-based networks.</div></li><li class="utils_listItem__6FEiz"><a class="model_cards_larger__R2cNM" href="/model_cards/NUIG-DSI">NUIG-DSI</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Shared Task 2021</small><span class="utils_smallSpace__dcJPu"></span><div class="model_cards_model__JYTdb">Finetuned T5 with additional pretraining data.</div></li></ul></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"allData":[{"id":"SimpleNER","title":"SimpleNER","type":"Shared Task 2021","background":"Finetuned Transformer architecture for simplification."},{"id":"FB","title":"Self-Training, Acceptability Classifiers and Context-Conditioning","type":"Shared Task 2021","background":"BART together with RoBERTa classifiers and context."},{"id":"POINTER","title":"POINTER","type":"Shared Task 2021","background":"POINTER is a hybrid architecture, combining transformers with insertion-based networks."},{"id":"NUIG-DSI","title":"NUIG-DSI","type":"Shared Task 2021","background":"Finetuned T5 with additional pretraining data."}]},"__N_SSG":true},"page":"/model_cards","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/model_cards/FB.html b/model_cards/FB.html
index 06c96cd9..34ee0573 100644
--- a/model_cards/FB.html
+++ b/model_cards/FB.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Self-Training, Acceptability Classifiers and Context-Conditioning</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards/%5Bid%5D-9cbdb0ece408e680.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Self-Training, Acceptability Classifiers and Context-Conditioning</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Shared Task 2021</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Self-Training, Acceptability Classifiers and Context-Conditioning</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards/%5Bid%5D-9ac5e4b15e7a7f67.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Self-Training, Acceptability Classifiers and Context-Conditioning</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Shared Task 2021</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
 <ul>
 <li><a href="#model-description">Model Description</a></li>
 <li><a href="#social-impact">Social Impact</a>
@@ -137,4 +137,4 @@ <h3 id="user-content-evaluation-details">Evaluation Details</h3>
 <p><em>How were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.</em></p>
 <p>We used the BLEU score from the GEM-metrics script to verify that a BART-Base model with no context was slightly better than the T5 baseline on the validation data.  The small increase could be explained by the inclusion of the service name in the input.  Subsequently, we used a different version of BLEU to compare models as the GEM-metrics were not easy to use in our standard computing environment.  We compared BART-Base and BART-Large models with and without templatizing the inputs and with 0, 1 or 5 turns of preceding context, finding that BART-Large with 5 turns of preceding context and templatized inputs worked the best. We then ran 2 rounds of self-training (each round having alternating generation and reconstruction), and observed improvements in the reconstruction match accuracy during this process, even though BLEU scores were either slightly decreased or stayed the same.</p>
 <p>When selecting responses using acceptability classifier, we further analyzed the cases where the selected response was different than the one outputted by the generation model, using a 2-way entailment (to establish paraphrases) via Roberta-large-mnli model between target and selected responses as well as target and original responses. We verified that there were more paraphrases in the former compared to the latter, as well as manually looked at a random sample in each case. We noted that BLEU scores were slightly changed (either increased or decreased) when adding the Acceptability Classifier, but the number of paraphrases (generated response w.r.t target response) were increased.</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#social-impact\"\u003eSocial Impact\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#additional-data\"\u003eAdditional Data\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-process\"\u003eTraining Process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#real-world-use\"\u003eReal-World Use\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#measuring-impact\"\u003eMeasuring Impact\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#reproducibility\"\u003eReproducibility\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description-1\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#model-hyperparameters\"\u003eModel Hyperparameters\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-details\"\u003eDataset Details\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation-details\"\u003eEvaluation Details\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-model-description\"\u003eModel Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"https://aclanthology.org/2021.gem-1.12\"\u003eStructure-to-Text Generation with Self-Training, Acceptability Classifiers and Context-Conditioning for the GEM Shared Task\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eCreators:\u003c/strong\u003e Shreyan Bakshi, Soumya Batra, Peyman Heidari, Ankit Arun, Shashank Jain, Michael White\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e Shreyan Bakshi (\u003ca href=\"mailto:shreyanb@fb.com\"\u003eshreyanb@fb.com\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-social-impact\"\u003eSocial Impact\u003c/h2\u003e\n\u003cp\u003eIn this section, we ask you to provide information on all of the steps that went into obtaining your models, especially in regards to how they would affect a user’s interactions with the technology if it was deployed in a live system.\u003c/p\u003e\n\u003cp\u003eThen, please choose one of these steps to analyze in terms of possible negative impacts on potential direct and indirect users, and propose a test to evaluate the existence and magnitude of that impact. We provide examples of such analyses in the following two paragraphs.\u003c/p\u003e\n\u003cp\u003eConsider for example a model pre-trained on English Wikipedia and fine-tuned on a summarization dataset, and imagine that such a model is deployed in a news website to provide automatic summaries. Given the gender gap on Wikipedia, we can imagine two possible effects:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003ethe model could systematically produce summaries with lower ROUGE scores when compared to a reference for articles describing women than it would for articles describing men.\u003c/li\u003e\n\u003cli\u003ethe model may be less likely to name the subject of an article if the subject is a woman than if the subject is a man.\nWe can measure either of these effects by running an entity linking system on the articles, for example, and comparing subsets of the test set where the gender of the actors is known.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eAlternatively, imagine that a system that is fine-tuned on WikiAuto, which aligns English Wikipedia text to its Simple English Wikipedia version, is used in an attempt to make a blog where the writer talks about their personal experience more accessible. How well does the model handle the shift from a third person to a first person point of view? One way we can check whether the behavior is roughly the same is by comparing the copy rate (number of words from the input that are re-used in the output) between the Wikipedia and blog setting.\u003c/p\u003e\n\u003cp\u003eWe ask you to take a similar approach to analyzing your model:\u003c/p\u003e\n\u003ch3 id=\"user-content-additional-data\"\u003eAdditional Data\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eIf you used a model that was pre-trained on additional data or used additional data, please describe it. Provide a link to a datasheet or data statement if there is one available, otherwise, provide as much relevant information as possible on the source of the data, the people represented in it, its languages, licensing, pre-processing, etc.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eFor generation (including self-training), a Pre-trained BART-Large Model as described/published (and available) here: \u003ca href=\"https://github.com/pytorch/fairseq/blob/master/examples/bart/README.md\"\u003ehttps://github.com/pytorch/fairseq/blob/master/examples/bart/README.md\u003c/a\u003e was used.\u003c/p\u003e\n\u003cp\u003eThis was published (\u003ca href=\"https://arxiv.org/pdf/1910.13461.pdf\"\u003ehttps://arxiv.org/pdf/1910.13461.pdf\u003c/a\u003e) in 2019 by Facebook AI, under the MIT License (\u003ca href=\"https://github.com/pytorch/fairseq\"\u003ehttps://github.com/pytorch/fairseq\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eWe also used the templates that Kale \u0026#x26; Rastogi used in their EMNLP-20 paper, available here: \u003ca href=\"https://github.com/google-research/schema-guided-dialogue/tree/main/generation\"\u003ehttps://github.com/google-research/schema-guided-dialogue/tree/main/generation\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003eFor the Acceptability Classification, we used a Pre-trained RoBERTa-Base Model as described/published (and available) here:\n\u003ca href=\"https://github.com/pytorch/fairseq/blob/master/examples/roberta/README.md\"\u003ehttps://github.com/pytorch/fairseq/blob/master/examples/roberta/README.md\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003eThis was published (\u003ca href=\"https://arxiv.org/pdf/1907.11692.pdf\"\u003ehttps://arxiv.org/pdf/1907.11692.pdf\u003c/a\u003e) in 2019 by Facebook AI, under the MIT License (\u003ca href=\"https://github.com/pytorch/fairseq\"\u003ehttps://github.com/pytorch/fairseq\u003c/a\u003e).\u003c/p\u003e\n\u003ch3 id=\"user-content-training-process\"\u003eTraining Process\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the training or fine tuning setup, including whether the final model was trained on a single task or in a multi-task setting. If a data augmentation technique was used, describe the technique\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe passed the SGD through the Google templates and augmented with the service name and five turns of preceding context.\u003c/p\u003e\n\u003cp\u003eFor WebNLG, we preprocessed the dataset based on the implicit tree structure ordering from Yang et al. (2020). We additionally sorted siblings by increasing subtree depth, breaking ties by sorting alphabetically on predicate names.\u003c/p\u003e\n\u003cp\u003eFor E2E, no additional pre-processing of that data was performed.\u003c/p\u003e\n\u003cp\u003eWe then fine-tuned a BART-Large model to generate the target responses using these inputs.  We also trained a BART-Large model to reconstruct the input from the generated response.  For SGD and WebNLG, this model included the service/domain name in the input as well and was trained to reconstruct the templated input (without context for SGD).\u003c/p\u003e\n\u003cp\u003eUsing these models, we ran 2 iterations of self-training, where we created unpaired inputs by deleting subsets of the input dialog acts, randomly choosing up to 20 unpaired inputs per original input.  In each self-training iteration, we ran the generator on the unpaired inputs and then the reconstruction model on the resulting responses, keeping those that yielded an exact match with the original input and adding them to the training data for retraining the generator and reconstruction models.\u003c/p\u003e\n\u003cp\u003eWe noted that for the case of SGD, the self-trained model was susceptible to stuttering, i.e., repeating the same phrase over and over again. This was not observed in the BART-Large generation model. Hence, to control for stuttering, for each response generated by the self-trained model, we used the heuristic that if any word (excluding stop words such as articles, conjunctions, etc.) was repeated in the generated response more than 5 times; we preferred the response generated by the BART-Large model instead.\u003c/p\u003e\n\u003cp\u003eFinally, we passed the generated responses for a dataset to an acceptability classifier that selected the first generated response in a 5-best list that passed the acceptability test for output (or the 1-best output if no responses passed the test). We filtered out the responses from the 5-best list that had an unacceptability score \u003e threshold, and the top response was selected from the remaining. In the case of all responses getting filtered out, the original 1-best response was selected.\u003c/p\u003e\n\u003cp\u003eWe trained an acceptability classifier for each dataset using the training data provided for that dataset and further augmenting it by generating synthetic positive and negative responses to augment those in the training data. The synthetic data was created by mask filling using the BART-Large generator (prior to self-training) fine-tuned on the original training data, with the resulting synthesized responses passed through a RoBERTa-large-mnli entailment model to determine whether they were acceptable (i.e., paraphrases of the seed response) or unacceptable (semantically inaccurate compared to the seed response).  Finally, the resulting data was used to train a RoBERTa-base classifier for predicting response acceptability.\u003c/p\u003e\n\u003ch3 id=\"user-content-real-world-use\"\u003eReal-World Use\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe a possible real-use application of your model, then choose one of the steps (e.g. choice of the pre-trained model, data source, data augmentation, training loss, etc.) above and describe a negative impact it may have on the user experience\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eBART is a pre-trained model whose training data is perhaps not as well-studied as it could be (as is apparently true of all pre-trained models).   Our SGD model conditions on the context and uses BART, and thus would be susceptible to being triggered into generating unsafe (malicious, racist, sexist, etc.) language by certain words that the user provides if it were to be deployed with no safety rails. We could estimate how susceptible the model might be to adversarial attacks by examining how often it copied a word from the context that was outside the training vocabulary. More sophisticated analyses could follow the methods of Dinan et al. (\u003ca href=\"https://arxiv.org/abs/1908.06083\"\u003ehttps://arxiv.org/abs/1908.06083\u003c/a\u003e).  As noted above, this could lead to unsafe responses getting generated in real-word use-cases such as Dialog systems for Assistants.\u003c/p\u003e\n\u003ch3 id=\"user-content-measuring-impact\"\u003eMeasuring Impact\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003ePropose a method to test the magnitude of the impact identified in your previous answer\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eAs noted earlier, Dinan et al. have proposed a method (\u003ca href=\"https://arxiv.org/abs/1908.06083\"\u003ehttps://arxiv.org/abs/1908.06083\u003c/a\u003e) for adversarial analysis that could be pursued to investigate this issue.  Subsequent work (e.g., Xu et al., \u003ca href=\"https://arxiv.org/abs/2010.07079\"\u003ehttps://arxiv.org/abs/2010.07079\u003c/a\u003e) has investigated ways of improving safety, but this remains an open problem.\u003c/p\u003e\n\u003ch2 id=\"user-content-reproducibility\"\u003eReproducibility\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eIn this section, we ask you to provide any information that would be required for someone to reproduce your model and experimental results. These questions are derived from the suggested model card in [1] and the reproducibility checklist in [2].\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Dodge, Jesse, et al. \"Show Your Work: Improved Reporting of Experimental Results.\" EMNLP. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Mitchell et al. \"Model cards for model reporting.\" Proceedings of the conference on fairness, accountability, and transparency. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-description-1\"\u003eModel Description\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide basic information about your model, including (1) the model type (e.g., BART, Pointer Network), (2) model version/date if multiple versions are available, (3) training algorithms used. Please cite papers or other resources where further information about the model can be found. Also include the model license and citation details.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eAs noted earlier, we use BART-Large and RoBERTa-Base models.\u003c/p\u003e\n\u003cp\u003eFor generation (including self-training), a Pre-trained BART-Large Model as described/published (and available) here: \u003ca href=\"https://github.com/pytorch/fairseq/blob/master/examples/bart/README.md\"\u003ehttps://github.com/pytorch/fairseq/blob/master/examples/bart/README.md\u003c/a\u003e was used.\u003c/p\u003e\n\u003cp\u003eThis was published (\u003ca href=\"https://arxiv.org/pdf/1910.13461.pdf\"\u003ehttps://arxiv.org/pdf/1910.13461.pdf\u003c/a\u003e) in 2019 by Facebook AI, under the MIT License (\u003ca href=\"https://github.com/pytorch/fairseq\"\u003ehttps://github.com/pytorch/fairseq\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eWe also used the templates that Kale \u0026#x26; Rastogi used in their EMNLP-20 paper, available here: \u003ca href=\"https://github.com/google-research/schema-guided-dialogue/tree/main/generation\"\u003ehttps://github.com/google-research/schema-guided-dialogue/tree/main/generation\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003eFor the Acceptability Classification, we used a Pre-trained RoBERTa-Base Model as described/published (and available) here:\n\u003ca href=\"https://github.com/pytorch/fairseq/blob/master/examples/roberta/README.md\"\u003ehttps://github.com/pytorch/fairseq/blob/master/examples/roberta/README.md\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003eThis was published (\u003ca href=\"https://arxiv.org/pdf/1907.11692.pdf\"\u003ehttps://arxiv.org/pdf/1907.11692.pdf\u003c/a\u003e) in 2019 by Facebook AI, under the MIT License (\u003ca href=\"https://github.com/pytorch/fairseq\"\u003ehttps://github.com/pytorch/fairseq\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eOur self-training and acceptability classifier methods are drawn from two papers under submission; further details will eventually be available in these papers.\u003c/p\u003e\n\u003ch3 id=\"user-content-model-hyperparameters\"\u003eModel Hyperparameters\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide the range of hyperparameters that would be required to reproduce your final model (e.g., optimizer used, number of epochs, learning rate, etc.). If hyperparameter search was used, please describe (1) the bounds for each hyperparameter, (2) the number of hyperparameter search trials, (3) the method for choosing hyperparameter values (e.g., uniform sampling, manual tuning, evolutionary optimization, etc.).\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eBART-Large Generation/Reconstruction Hyperparameters\u003c/p\u003e\n\u003cp\u003eTokenizer: BPE\nTokenizer Max Length: 256\nDropout: 0.3\nEncoder/Decoder Embedding Dim: 1024\nOptimizer: Adam\nLR: 0.000005\nWeight Decay: 0.00001\nNumber of Model Params: 514484225\u003c/p\u003e\n\u003cp\u003eAcceptability Classifier Roberta-Base Hyperparameters\u003c/p\u003e\n\u003cp\u003eTokenizer: BPE\nTokenizer Max Length: 1024\nEncoder output dropout: 0.1\nEncoder embedding dim: 768\n#encoder layers: 12\n#encoder attention heads: 12\nDecoder dropout: 0\nDecoder activation: relu\nOptimizer: Adam\nLearning rate: 0.000001\nAdam betas: [0.9, 0.999]\nWeight Decay: 0\n#Model Params: 124055810\u003c/p\u003e\n\u003cp\u003eAcceptability Classifier Data Generation Model Hyperparameters\u003c/p\u003e\n\u003cp\u003eBeam Size: 5\ntopk: 3\nMask normal: 0.5\nMask insert: 0.3\u003c/p\u003e\n\u003ch3 id=\"user-content-the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/h3\u003e\n\u003cp\u003eAcceptability Classification RoBERTa\u003c/p\u003e\n\u003cp\u003eIn addition to the above hyperparameters, best performing model on the val set had the following unacceptability confidence thresholds at which filtering out took place:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e        Dataset                    |     Unacceptability Threshold\n\n        webnlg                     |            0.7\n\n          e2e                         |            0.6\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eSchema guided dialog      |             0.6\u003c/p\u003e\n\u003cp\u003eBounds used to calculate this were: [0.1 - 0.9] with 0.1 step size.\u003c/p\u003e\n\u003ch3 id=\"user-content-dataset-details\"\u003eDataset Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude relevant training data statistics (e.g., number of samples used, whether some subsets of the dataset were discarded), the training/validation/test splits for the number of samples and any pre-processing steps if used.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eAll the GEM End to End, WebNLG and Schema Guided Dialog train set samples were (transformed and then) used for finetuning the (pre-trained) BART-Large generation model.  The data transforms we performed for WebNLG were:   Pre-processed the dataset based on the implicit tree structure ordering from Yang et al. (2020). We additionally sorted siblings by increasing subtree depth, breaking ties by sorting alphabetically on predicate names.  The data transforms we performed for SGD were:   Create inputs with the service name, templatized inputs and five preceding turns of dialog context separated by a separator token Using the Google templates required additionally retrieving the service method call from the original SGD dataset Preceding turns were listed in order, prefixed by “user:” or “sys:”\u003c/p\u003e\n\u003ch3 id=\"user-content-dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude a specification of library dependencies\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe following Python Libraries:\u003c/p\u003e\n\u003cp\u003ePyText, FairSeq, Pandas, Numpy, Json, csv, math, sacrebleu, nltk\u003c/p\u003e\n\u003ch3 id=\"user-content-link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/h3\u003e\n\u003cp\u003eN/A\u003c/p\u003e\n\u003ch3 id=\"user-content-computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the computing infrastructure used to train your model (e.g., number of GPUs, GPU type and vRAM) and the time taken to train your final model.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eFor dataset transforms: single CPU\u003c/p\u003e\n\u003cp\u003eFor training each generation BART-Large model: 8 GPUs, about 3.5 hours for larger datasets like SGD\u003c/p\u003e\n\u003cp\u003eFor training accuracy classifier RoBERTa-base model: 8 GPUs, data prep + training time is a function of the dataset size, taking upto 2 days on larger datasets like SGD\u003c/p\u003e\n\u003cp\u003eAll experiments were conducted on 32GB Quadro GV100 GPUs. The GPUs are part of a shared distributed cluster, which adds its own time overheads.\u003c/p\u003e\n\u003ch3 id=\"user-content-evaluation-details\"\u003eEvaluation Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eHow were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe used the BLEU score from the GEM-metrics script to verify that a BART-Base model with no context was slightly better than the T5 baseline on the validation data.  The small increase could be explained by the inclusion of the service name in the input.  Subsequently, we used a different version of BLEU to compare models as the GEM-metrics were not easy to use in our standard computing environment.  We compared BART-Base and BART-Large models with and without templatizing the inputs and with 0, 1 or 5 turns of preceding context, finding that BART-Large with 5 turns of preceding context and templatized inputs worked the best. We then ran 2 rounds of self-training (each round having alternating generation and reconstruction), and observed improvements in the reconstruction match accuracy during this process, even though BLEU scores were either slightly decreased or stayed the same.\u003c/p\u003e\n\u003cp\u003eWhen selecting responses using acceptability classifier, we further analyzed the cases where the selected response was different than the one outputted by the generation model, using a 2-way entailment (to establish paraphrases) via Roberta-large-mnli model between target and selected responses as well as target and original responses. We verified that there were more paraphrases in the former compared to the latter, as well as manually looked at a random sample in each case. We noted that BLEU scores were slightly changed (either increased or decreased) when adding the Acceptability Classifier, but the number of paraphrases (generated response w.r.t target response) were increased.\u003c/p\u003e\n","title":"Self-Training, Acceptability Classifiers and Context-Conditioning","type":"Shared Task 2021","background":"BART together with RoBERTa classifiers and context."}},"__N_SSG":true},"page":"/model_cards/[id]","query":{"id":"FB"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#social-impact\"\u003eSocial Impact\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#additional-data\"\u003eAdditional Data\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-process\"\u003eTraining Process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#real-world-use\"\u003eReal-World Use\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#measuring-impact\"\u003eMeasuring Impact\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#reproducibility\"\u003eReproducibility\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description-1\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#model-hyperparameters\"\u003eModel Hyperparameters\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-details\"\u003eDataset Details\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation-details\"\u003eEvaluation Details\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-model-description\"\u003eModel Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"https://aclanthology.org/2021.gem-1.12\"\u003eStructure-to-Text Generation with Self-Training, Acceptability Classifiers and Context-Conditioning for the GEM Shared Task\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eCreators:\u003c/strong\u003e Shreyan Bakshi, Soumya Batra, Peyman Heidari, Ankit Arun, Shashank Jain, Michael White\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e Shreyan Bakshi (\u003ca href=\"mailto:shreyanb@fb.com\"\u003eshreyanb@fb.com\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-social-impact\"\u003eSocial Impact\u003c/h2\u003e\n\u003cp\u003eIn this section, we ask you to provide information on all of the steps that went into obtaining your models, especially in regards to how they would affect a user’s interactions with the technology if it was deployed in a live system.\u003c/p\u003e\n\u003cp\u003eThen, please choose one of these steps to analyze in terms of possible negative impacts on potential direct and indirect users, and propose a test to evaluate the existence and magnitude of that impact. We provide examples of such analyses in the following two paragraphs.\u003c/p\u003e\n\u003cp\u003eConsider for example a model pre-trained on English Wikipedia and fine-tuned on a summarization dataset, and imagine that such a model is deployed in a news website to provide automatic summaries. Given the gender gap on Wikipedia, we can imagine two possible effects:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003ethe model could systematically produce summaries with lower ROUGE scores when compared to a reference for articles describing women than it would for articles describing men.\u003c/li\u003e\n\u003cli\u003ethe model may be less likely to name the subject of an article if the subject is a woman than if the subject is a man.\nWe can measure either of these effects by running an entity linking system on the articles, for example, and comparing subsets of the test set where the gender of the actors is known.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eAlternatively, imagine that a system that is fine-tuned on WikiAuto, which aligns English Wikipedia text to its Simple English Wikipedia version, is used in an attempt to make a blog where the writer talks about their personal experience more accessible. How well does the model handle the shift from a third person to a first person point of view? One way we can check whether the behavior is roughly the same is by comparing the copy rate (number of words from the input that are re-used in the output) between the Wikipedia and blog setting.\u003c/p\u003e\n\u003cp\u003eWe ask you to take a similar approach to analyzing your model:\u003c/p\u003e\n\u003ch3 id=\"user-content-additional-data\"\u003eAdditional Data\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eIf you used a model that was pre-trained on additional data or used additional data, please describe it. Provide a link to a datasheet or data statement if there is one available, otherwise, provide as much relevant information as possible on the source of the data, the people represented in it, its languages, licensing, pre-processing, etc.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eFor generation (including self-training), a Pre-trained BART-Large Model as described/published (and available) here: \u003ca href=\"https://github.com/pytorch/fairseq/blob/master/examples/bart/README.md\"\u003ehttps://github.com/pytorch/fairseq/blob/master/examples/bart/README.md\u003c/a\u003e was used.\u003c/p\u003e\n\u003cp\u003eThis was published (\u003ca href=\"https://arxiv.org/pdf/1910.13461.pdf\"\u003ehttps://arxiv.org/pdf/1910.13461.pdf\u003c/a\u003e) in 2019 by Facebook AI, under the MIT License (\u003ca href=\"https://github.com/pytorch/fairseq\"\u003ehttps://github.com/pytorch/fairseq\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eWe also used the templates that Kale \u0026#x26; Rastogi used in their EMNLP-20 paper, available here: \u003ca href=\"https://github.com/google-research/schema-guided-dialogue/tree/main/generation\"\u003ehttps://github.com/google-research/schema-guided-dialogue/tree/main/generation\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003eFor the Acceptability Classification, we used a Pre-trained RoBERTa-Base Model as described/published (and available) here:\n\u003ca href=\"https://github.com/pytorch/fairseq/blob/master/examples/roberta/README.md\"\u003ehttps://github.com/pytorch/fairseq/blob/master/examples/roberta/README.md\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003eThis was published (\u003ca href=\"https://arxiv.org/pdf/1907.11692.pdf\"\u003ehttps://arxiv.org/pdf/1907.11692.pdf\u003c/a\u003e) in 2019 by Facebook AI, under the MIT License (\u003ca href=\"https://github.com/pytorch/fairseq\"\u003ehttps://github.com/pytorch/fairseq\u003c/a\u003e).\u003c/p\u003e\n\u003ch3 id=\"user-content-training-process\"\u003eTraining Process\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the training or fine tuning setup, including whether the final model was trained on a single task or in a multi-task setting. If a data augmentation technique was used, describe the technique\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe passed the SGD through the Google templates and augmented with the service name and five turns of preceding context.\u003c/p\u003e\n\u003cp\u003eFor WebNLG, we preprocessed the dataset based on the implicit tree structure ordering from Yang et al. (2020). We additionally sorted siblings by increasing subtree depth, breaking ties by sorting alphabetically on predicate names.\u003c/p\u003e\n\u003cp\u003eFor E2E, no additional pre-processing of that data was performed.\u003c/p\u003e\n\u003cp\u003eWe then fine-tuned a BART-Large model to generate the target responses using these inputs.  We also trained a BART-Large model to reconstruct the input from the generated response.  For SGD and WebNLG, this model included the service/domain name in the input as well and was trained to reconstruct the templated input (without context for SGD).\u003c/p\u003e\n\u003cp\u003eUsing these models, we ran 2 iterations of self-training, where we created unpaired inputs by deleting subsets of the input dialog acts, randomly choosing up to 20 unpaired inputs per original input.  In each self-training iteration, we ran the generator on the unpaired inputs and then the reconstruction model on the resulting responses, keeping those that yielded an exact match with the original input and adding them to the training data for retraining the generator and reconstruction models.\u003c/p\u003e\n\u003cp\u003eWe noted that for the case of SGD, the self-trained model was susceptible to stuttering, i.e., repeating the same phrase over and over again. This was not observed in the BART-Large generation model. Hence, to control for stuttering, for each response generated by the self-trained model, we used the heuristic that if any word (excluding stop words such as articles, conjunctions, etc.) was repeated in the generated response more than 5 times; we preferred the response generated by the BART-Large model instead.\u003c/p\u003e\n\u003cp\u003eFinally, we passed the generated responses for a dataset to an acceptability classifier that selected the first generated response in a 5-best list that passed the acceptability test for output (or the 1-best output if no responses passed the test). We filtered out the responses from the 5-best list that had an unacceptability score \u003e threshold, and the top response was selected from the remaining. In the case of all responses getting filtered out, the original 1-best response was selected.\u003c/p\u003e\n\u003cp\u003eWe trained an acceptability classifier for each dataset using the training data provided for that dataset and further augmenting it by generating synthetic positive and negative responses to augment those in the training data. The synthetic data was created by mask filling using the BART-Large generator (prior to self-training) fine-tuned on the original training data, with the resulting synthesized responses passed through a RoBERTa-large-mnli entailment model to determine whether they were acceptable (i.e., paraphrases of the seed response) or unacceptable (semantically inaccurate compared to the seed response).  Finally, the resulting data was used to train a RoBERTa-base classifier for predicting response acceptability.\u003c/p\u003e\n\u003ch3 id=\"user-content-real-world-use\"\u003eReal-World Use\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe a possible real-use application of your model, then choose one of the steps (e.g. choice of the pre-trained model, data source, data augmentation, training loss, etc.) above and describe a negative impact it may have on the user experience\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eBART is a pre-trained model whose training data is perhaps not as well-studied as it could be (as is apparently true of all pre-trained models).   Our SGD model conditions on the context and uses BART, and thus would be susceptible to being triggered into generating unsafe (malicious, racist, sexist, etc.) language by certain words that the user provides if it were to be deployed with no safety rails. We could estimate how susceptible the model might be to adversarial attacks by examining how often it copied a word from the context that was outside the training vocabulary. More sophisticated analyses could follow the methods of Dinan et al. (\u003ca href=\"https://arxiv.org/abs/1908.06083\"\u003ehttps://arxiv.org/abs/1908.06083\u003c/a\u003e).  As noted above, this could lead to unsafe responses getting generated in real-word use-cases such as Dialog systems for Assistants.\u003c/p\u003e\n\u003ch3 id=\"user-content-measuring-impact\"\u003eMeasuring Impact\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003ePropose a method to test the magnitude of the impact identified in your previous answer\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eAs noted earlier, Dinan et al. have proposed a method (\u003ca href=\"https://arxiv.org/abs/1908.06083\"\u003ehttps://arxiv.org/abs/1908.06083\u003c/a\u003e) for adversarial analysis that could be pursued to investigate this issue.  Subsequent work (e.g., Xu et al., \u003ca href=\"https://arxiv.org/abs/2010.07079\"\u003ehttps://arxiv.org/abs/2010.07079\u003c/a\u003e) has investigated ways of improving safety, but this remains an open problem.\u003c/p\u003e\n\u003ch2 id=\"user-content-reproducibility\"\u003eReproducibility\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eIn this section, we ask you to provide any information that would be required for someone to reproduce your model and experimental results. These questions are derived from the suggested model card in [1] and the reproducibility checklist in [2].\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Dodge, Jesse, et al. \"Show Your Work: Improved Reporting of Experimental Results.\" EMNLP. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Mitchell et al. \"Model cards for model reporting.\" Proceedings of the conference on fairness, accountability, and transparency. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-description-1\"\u003eModel Description\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide basic information about your model, including (1) the model type (e.g., BART, Pointer Network), (2) model version/date if multiple versions are available, (3) training algorithms used. Please cite papers or other resources where further information about the model can be found. Also include the model license and citation details.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eAs noted earlier, we use BART-Large and RoBERTa-Base models.\u003c/p\u003e\n\u003cp\u003eFor generation (including self-training), a Pre-trained BART-Large Model as described/published (and available) here: \u003ca href=\"https://github.com/pytorch/fairseq/blob/master/examples/bart/README.md\"\u003ehttps://github.com/pytorch/fairseq/blob/master/examples/bart/README.md\u003c/a\u003e was used.\u003c/p\u003e\n\u003cp\u003eThis was published (\u003ca href=\"https://arxiv.org/pdf/1910.13461.pdf\"\u003ehttps://arxiv.org/pdf/1910.13461.pdf\u003c/a\u003e) in 2019 by Facebook AI, under the MIT License (\u003ca href=\"https://github.com/pytorch/fairseq\"\u003ehttps://github.com/pytorch/fairseq\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eWe also used the templates that Kale \u0026#x26; Rastogi used in their EMNLP-20 paper, available here: \u003ca href=\"https://github.com/google-research/schema-guided-dialogue/tree/main/generation\"\u003ehttps://github.com/google-research/schema-guided-dialogue/tree/main/generation\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003eFor the Acceptability Classification, we used a Pre-trained RoBERTa-Base Model as described/published (and available) here:\n\u003ca href=\"https://github.com/pytorch/fairseq/blob/master/examples/roberta/README.md\"\u003ehttps://github.com/pytorch/fairseq/blob/master/examples/roberta/README.md\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003eThis was published (\u003ca href=\"https://arxiv.org/pdf/1907.11692.pdf\"\u003ehttps://arxiv.org/pdf/1907.11692.pdf\u003c/a\u003e) in 2019 by Facebook AI, under the MIT License (\u003ca href=\"https://github.com/pytorch/fairseq\"\u003ehttps://github.com/pytorch/fairseq\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eOur self-training and acceptability classifier methods are drawn from two papers under submission; further details will eventually be available in these papers.\u003c/p\u003e\n\u003ch3 id=\"user-content-model-hyperparameters\"\u003eModel Hyperparameters\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide the range of hyperparameters that would be required to reproduce your final model (e.g., optimizer used, number of epochs, learning rate, etc.). If hyperparameter search was used, please describe (1) the bounds for each hyperparameter, (2) the number of hyperparameter search trials, (3) the method for choosing hyperparameter values (e.g., uniform sampling, manual tuning, evolutionary optimization, etc.).\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eBART-Large Generation/Reconstruction Hyperparameters\u003c/p\u003e\n\u003cp\u003eTokenizer: BPE\nTokenizer Max Length: 256\nDropout: 0.3\nEncoder/Decoder Embedding Dim: 1024\nOptimizer: Adam\nLR: 0.000005\nWeight Decay: 0.00001\nNumber of Model Params: 514484225\u003c/p\u003e\n\u003cp\u003eAcceptability Classifier Roberta-Base Hyperparameters\u003c/p\u003e\n\u003cp\u003eTokenizer: BPE\nTokenizer Max Length: 1024\nEncoder output dropout: 0.1\nEncoder embedding dim: 768\n#encoder layers: 12\n#encoder attention heads: 12\nDecoder dropout: 0\nDecoder activation: relu\nOptimizer: Adam\nLearning rate: 0.000001\nAdam betas: [0.9, 0.999]\nWeight Decay: 0\n#Model Params: 124055810\u003c/p\u003e\n\u003cp\u003eAcceptability Classifier Data Generation Model Hyperparameters\u003c/p\u003e\n\u003cp\u003eBeam Size: 5\ntopk: 3\nMask normal: 0.5\nMask insert: 0.3\u003c/p\u003e\n\u003ch3 id=\"user-content-the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/h3\u003e\n\u003cp\u003eAcceptability Classification RoBERTa\u003c/p\u003e\n\u003cp\u003eIn addition to the above hyperparameters, best performing model on the val set had the following unacceptability confidence thresholds at which filtering out took place:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e        Dataset                    |     Unacceptability Threshold\n\n        webnlg                     |            0.7\n\n          e2e                         |            0.6\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eSchema guided dialog      |             0.6\u003c/p\u003e\n\u003cp\u003eBounds used to calculate this were: [0.1 - 0.9] with 0.1 step size.\u003c/p\u003e\n\u003ch3 id=\"user-content-dataset-details\"\u003eDataset Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude relevant training data statistics (e.g., number of samples used, whether some subsets of the dataset were discarded), the training/validation/test splits for the number of samples and any pre-processing steps if used.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eAll the GEM End to End, WebNLG and Schema Guided Dialog train set samples were (transformed and then) used for finetuning the (pre-trained) BART-Large generation model.  The data transforms we performed for WebNLG were:   Pre-processed the dataset based on the implicit tree structure ordering from Yang et al. (2020). We additionally sorted siblings by increasing subtree depth, breaking ties by sorting alphabetically on predicate names.  The data transforms we performed for SGD were:   Create inputs with the service name, templatized inputs and five preceding turns of dialog context separated by a separator token Using the Google templates required additionally retrieving the service method call from the original SGD dataset Preceding turns were listed in order, prefixed by “user:” or “sys:”\u003c/p\u003e\n\u003ch3 id=\"user-content-dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude a specification of library dependencies\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe following Python Libraries:\u003c/p\u003e\n\u003cp\u003ePyText, FairSeq, Pandas, Numpy, Json, csv, math, sacrebleu, nltk\u003c/p\u003e\n\u003ch3 id=\"user-content-link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/h3\u003e\n\u003cp\u003eN/A\u003c/p\u003e\n\u003ch3 id=\"user-content-computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the computing infrastructure used to train your model (e.g., number of GPUs, GPU type and vRAM) and the time taken to train your final model.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eFor dataset transforms: single CPU\u003c/p\u003e\n\u003cp\u003eFor training each generation BART-Large model: 8 GPUs, about 3.5 hours for larger datasets like SGD\u003c/p\u003e\n\u003cp\u003eFor training accuracy classifier RoBERTa-base model: 8 GPUs, data prep + training time is a function of the dataset size, taking upto 2 days on larger datasets like SGD\u003c/p\u003e\n\u003cp\u003eAll experiments were conducted on 32GB Quadro GV100 GPUs. The GPUs are part of a shared distributed cluster, which adds its own time overheads.\u003c/p\u003e\n\u003ch3 id=\"user-content-evaluation-details\"\u003eEvaluation Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eHow were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe used the BLEU score from the GEM-metrics script to verify that a BART-Base model with no context was slightly better than the T5 baseline on the validation data.  The small increase could be explained by the inclusion of the service name in the input.  Subsequently, we used a different version of BLEU to compare models as the GEM-metrics were not easy to use in our standard computing environment.  We compared BART-Base and BART-Large models with and without templatizing the inputs and with 0, 1 or 5 turns of preceding context, finding that BART-Large with 5 turns of preceding context and templatized inputs worked the best. We then ran 2 rounds of self-training (each round having alternating generation and reconstruction), and observed improvements in the reconstruction match accuracy during this process, even though BLEU scores were either slightly decreased or stayed the same.\u003c/p\u003e\n\u003cp\u003eWhen selecting responses using acceptability classifier, we further analyzed the cases where the selected response was different than the one outputted by the generation model, using a 2-way entailment (to establish paraphrases) via Roberta-large-mnli model between target and selected responses as well as target and original responses. We verified that there were more paraphrases in the former compared to the latter, as well as manually looked at a random sample in each case. We noted that BLEU scores were slightly changed (either increased or decreased) when adding the Acceptability Classifier, but the number of paraphrases (generated response w.r.t target response) were increased.\u003c/p\u003e\n","title":"Self-Training, Acceptability Classifiers and Context-Conditioning","type":"Shared Task 2021","background":"BART together with RoBERTa classifiers and context."}},"__N_SSG":true},"page":"/model_cards/[id]","query":{"id":"FB"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/model_cards/NUIG-DSI.html b/model_cards/NUIG-DSI.html
index 846f2ea8..9c365a1b 100644
--- a/model_cards/NUIG-DSI.html
+++ b/model_cards/NUIG-DSI.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->NUIG-DSI</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards/%5Bid%5D-9cbdb0ece408e680.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">NUIG-DSI</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Shared Task 2021</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->NUIG-DSI</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards/%5Bid%5D-9ac5e4b15e7a7f67.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">NUIG-DSI</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Shared Task 2021</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
 <ul>
 <li><a href="#model-description">Model Description</a></li>
 <li><a href="#social-impact">Social Impact</a>
@@ -78,4 +78,4 @@ <h3 id="user-content-computing-infrastructure-used">Computing Infrastructure Use
 <h3 id="user-content-evaluation-details">Evaluation Details</h3>
 <p><em>How were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.</em></p>
 <p>We used the GEM metrics evaluation script for automatic evaluation.</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#social-impact\"\u003eSocial Impact\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#additional-data\"\u003eAdditional Data\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-process\"\u003eTraining Process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#real-world-use\"\u003eReal-World Use\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#measuring-impact\"\u003eMeasuring Impact\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#reproducibility\"\u003eReproducibility\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description-1\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#model-hyperparameters\"\u003eModel Hyperparameters\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-details\"\u003eDataset Details\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation-details\"\u003eEvaluation Details\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-model-description\"\u003eModel Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"https://aclanthology.org/2021.gem-1.13/\"\u003eNUIG-DSI’s submission to The GEM Benchmark 2021\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eCreators:\u003c/strong\u003e Nivranshu Pasricha, Mihael Arcan, Paul Buitelaar\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e Nivranshu Pasricha (\u003ca href=\"mailto:n.pasricha1@nuigalway.ie\"\u003en.pasricha1@nuigalway.ie\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-social-impact\"\u003eSocial Impact\u003c/h2\u003e\n\u003cp\u003eIn this section, we ask you to provide information on all of the steps that went into obtaining your models, especially in regards to how they would affect a user’s interactions with the technology if it was deployed in a live system.\u003c/p\u003e\n\u003cp\u003eThen, please choose one of these steps to analyze in terms of possible negative impacts on potential direct and indirect users, and propose a test to evaluate the existence and magnitude of that impact. We provide examples of such analyses in the following two paragraphs.\u003c/p\u003e\n\u003cp\u003eConsider for example a model pre-trained on English Wikipedia and fine-tuned on a summarization dataset, and imagine that such a model is deployed in a news website to provide automatic summaries. Given the gender gap on Wikipedia, we can imagine two possible effects:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003ethe model could systematically produce summaries with lower ROUGE scores when compared to a reference for articles describing women than it would for articles describing men.\u003c/li\u003e\n\u003cli\u003ethe model may be less likely to name the subject of an article if the subject is a woman than if the subject is a man.\nWe can measure either of these effects by running an entity linking system on the articles, for example, and comparing subsets of the test set where the gender of the actors is known.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eAlternatively, imagine that a system that is fine-tuned on WikiAuto, which aligns English Wikipedia text to its Simple English Wikipedia version, is used in an attempt to make a blog where the writer talks about their personal experience more accessible. How well does the model handle the shift from a third person to a first person point of view? One way we can check whether the behavior is roughly the same is by comparing the copy rate (number of words from the input that are re-used in the output) between the Wikipedia and blog setting.\u003c/p\u003e\n\u003cp\u003eWe ask you to take a similar approach to analyzing your model:\u003c/p\u003e\n\u003ch3 id=\"user-content-additional-data\"\u003eAdditional Data\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eIf you used a model that was pre-trained on additional data or used additional data, please describe it. Provide a link to a datasheet or data statement if there is one available, otherwise, provide as much relevant information as possible on the source of the data, the people represented in it, its languages, licensing, pre-processing, etc.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe used additional data from DBpedia. Particularly, we used abstracts from DBpedia for the entities present in the training set of DART and WebNLG datasets\u003c/p\u003e\n\u003ch3 id=\"user-content-training-process\"\u003eTraining Process\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the training or fine tuning setup, including whether the final model was trained on a single task or in a multi-task setting. If a data augmentation technique was used, describe the technique\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe use pre-trained T5 model (base variant with ~220M parameters) for submissions on DART, WebNLG, CommonGen and E2E datasets. A single model was used for DART and WebNLG where we additionally train on DBpedia abstracts before fine-tuning on the DART training set. For the other two datasets, we perform additional masked pre-training on the target texts before fine-tuning on the respective training sets. We experiment with different masking strategies focussing on masking entities/predicates/concepts as well as randomly masked spans. We will provide more details on this in our system description.\u003c/p\u003e\n\u003ch3 id=\"user-content-real-world-use\"\u003eReal-World Use\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe a possible real-use application of your model, then choose one of the steps (e.g. choice of the pre-trained model, data source, data augmentation, training loss, etc.) above and describe a negative impact it may have on the user experience\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003ePossible applications can include automatically generating texts conditioned on some input data. A negative impact would be generation of factually incorrect texts as models such as T5 are prone to hallucinations and can generate text which might contradict real-world facts.\u003c/p\u003e\n\u003ch3 id=\"user-content-measuring-impact\"\u003eMeasuring Impact\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003ePropose a method to test the magnitude of the impact identified in your previous answer\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eStrong evaluation methods, automatic or manual would be required to ensure the correctness of generated texts.\u003c/p\u003e\n\u003ch2 id=\"user-content-reproducibility\"\u003eReproducibility\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eIn this section, we ask you to provide any information that would be required for someone to reproduce your model and experimental results. These questions are derived from the suggested model card in [1] and the reproducibility checklist in [2].\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Dodge, Jesse, et al. \"Show Your Work: Improved Reporting of Experimental Results.\" EMNLP. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Mitchell et al. \"Model cards for model reporting.\" Proceedings of the conference on fairness, accountability, and transparency. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-description-1\"\u003eModel Description\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide basic information about your model, including (1) the model type (e.g., BART, Pointer Network), (2) model version/date if multiple versions are available, (3) training algorithms used. Please cite papers or other resources where further information about the model can be found. Also include the model license and citation details.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe used pre-trained T5-base model implemented in HuggingFace's transformers library. Model version is from April-May 2021 and for training we follow a similar approach to the baseline model described in the GEM benchmark paper of fine-tuning on the GEM training datasets. We performed additional pre-training on target texts and in one case on abstracts from DBpedia where we experimented with different masking strategies for pre-training. Mainly, we tried a strategy of masking random spans and compared it with another strategy where we masked entities/predicates/concepts present in the input data.\u003c/p\u003e\n\u003ch3 id=\"user-content-model-hyperparameters\"\u003eModel Hyperparameters\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide the range of hyperparameters that would be required to reproduce your final model (e.g., optimizer used, number of epochs, learning rate, etc.). If hyperparameter search was used, please describe (1) the bounds for each hyperparameter, (2) the number of hyperparameter search trials, (3) the method for choosing hyperparameter values (e.g., uniform sampling, manual tuning, evolutionary optimization, etc.).\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe mostly used the default hyperparameter values. The values changed are mentioned below:\nEpochs: 10 (Best model chosen with minimum loss on the validation set)\nLearning Rate: 1e-3\nBeam Size: 5\nSeed: 2601\u003c/p\u003e\n\u003ch3 id=\"user-content-dataset-details\"\u003eDataset Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude relevant training data statistics (e.g., number of samples used, whether some subsets of the dataset were discarded), the training/validation/test splits for the number of samples and any pre-processing steps if used.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe use the data from the 'training' subset for each dataset.\u003c/p\u003e\n\u003ch3 id=\"user-content-dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude a specification of library dependencies\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eN/A\u003c/p\u003e\n\u003ch3 id=\"user-content-link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/h3\u003e\n\u003cp\u003eN/A\u003c/p\u003e\n\u003ch3 id=\"user-content-computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the computing infrastructure used to train your model (e.g., number of GPUs, GPU type and vRAM) and the time taken to train your final model.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eNVIDIA GeForce GTX 1080 GPU. Model training would have taken approximately 12-15 hours on all the datasets where we make a submission.\u003c/p\u003e\n\u003ch3 id=\"user-content-evaluation-details\"\u003eEvaluation Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eHow were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe used the GEM metrics evaluation script for automatic evaluation.\u003c/p\u003e\n","title":"NUIG-DSI","type":"Shared Task 2021","background":"Finetuned T5 with additional pretraining data."}},"__N_SSG":true},"page":"/model_cards/[id]","query":{"id":"NUIG-DSI"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#social-impact\"\u003eSocial Impact\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#additional-data\"\u003eAdditional Data\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-process\"\u003eTraining Process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#real-world-use\"\u003eReal-World Use\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#measuring-impact\"\u003eMeasuring Impact\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#reproducibility\"\u003eReproducibility\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description-1\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#model-hyperparameters\"\u003eModel Hyperparameters\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-details\"\u003eDataset Details\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation-details\"\u003eEvaluation Details\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-model-description\"\u003eModel Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"https://aclanthology.org/2021.gem-1.13/\"\u003eNUIG-DSI’s submission to The GEM Benchmark 2021\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eCreators:\u003c/strong\u003e Nivranshu Pasricha, Mihael Arcan, Paul Buitelaar\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e Nivranshu Pasricha (\u003ca href=\"mailto:n.pasricha1@nuigalway.ie\"\u003en.pasricha1@nuigalway.ie\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-social-impact\"\u003eSocial Impact\u003c/h2\u003e\n\u003cp\u003eIn this section, we ask you to provide information on all of the steps that went into obtaining your models, especially in regards to how they would affect a user’s interactions with the technology if it was deployed in a live system.\u003c/p\u003e\n\u003cp\u003eThen, please choose one of these steps to analyze in terms of possible negative impacts on potential direct and indirect users, and propose a test to evaluate the existence and magnitude of that impact. We provide examples of such analyses in the following two paragraphs.\u003c/p\u003e\n\u003cp\u003eConsider for example a model pre-trained on English Wikipedia and fine-tuned on a summarization dataset, and imagine that such a model is deployed in a news website to provide automatic summaries. Given the gender gap on Wikipedia, we can imagine two possible effects:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003ethe model could systematically produce summaries with lower ROUGE scores when compared to a reference for articles describing women than it would for articles describing men.\u003c/li\u003e\n\u003cli\u003ethe model may be less likely to name the subject of an article if the subject is a woman than if the subject is a man.\nWe can measure either of these effects by running an entity linking system on the articles, for example, and comparing subsets of the test set where the gender of the actors is known.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eAlternatively, imagine that a system that is fine-tuned on WikiAuto, which aligns English Wikipedia text to its Simple English Wikipedia version, is used in an attempt to make a blog where the writer talks about their personal experience more accessible. How well does the model handle the shift from a third person to a first person point of view? One way we can check whether the behavior is roughly the same is by comparing the copy rate (number of words from the input that are re-used in the output) between the Wikipedia and blog setting.\u003c/p\u003e\n\u003cp\u003eWe ask you to take a similar approach to analyzing your model:\u003c/p\u003e\n\u003ch3 id=\"user-content-additional-data\"\u003eAdditional Data\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eIf you used a model that was pre-trained on additional data or used additional data, please describe it. Provide a link to a datasheet or data statement if there is one available, otherwise, provide as much relevant information as possible on the source of the data, the people represented in it, its languages, licensing, pre-processing, etc.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe used additional data from DBpedia. Particularly, we used abstracts from DBpedia for the entities present in the training set of DART and WebNLG datasets\u003c/p\u003e\n\u003ch3 id=\"user-content-training-process\"\u003eTraining Process\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the training or fine tuning setup, including whether the final model was trained on a single task or in a multi-task setting. If a data augmentation technique was used, describe the technique\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe use pre-trained T5 model (base variant with ~220M parameters) for submissions on DART, WebNLG, CommonGen and E2E datasets. A single model was used for DART and WebNLG where we additionally train on DBpedia abstracts before fine-tuning on the DART training set. For the other two datasets, we perform additional masked pre-training on the target texts before fine-tuning on the respective training sets. We experiment with different masking strategies focussing on masking entities/predicates/concepts as well as randomly masked spans. We will provide more details on this in our system description.\u003c/p\u003e\n\u003ch3 id=\"user-content-real-world-use\"\u003eReal-World Use\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe a possible real-use application of your model, then choose one of the steps (e.g. choice of the pre-trained model, data source, data augmentation, training loss, etc.) above and describe a negative impact it may have on the user experience\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003ePossible applications can include automatically generating texts conditioned on some input data. A negative impact would be generation of factually incorrect texts as models such as T5 are prone to hallucinations and can generate text which might contradict real-world facts.\u003c/p\u003e\n\u003ch3 id=\"user-content-measuring-impact\"\u003eMeasuring Impact\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003ePropose a method to test the magnitude of the impact identified in your previous answer\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eStrong evaluation methods, automatic or manual would be required to ensure the correctness of generated texts.\u003c/p\u003e\n\u003ch2 id=\"user-content-reproducibility\"\u003eReproducibility\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eIn this section, we ask you to provide any information that would be required for someone to reproduce your model and experimental results. These questions are derived from the suggested model card in [1] and the reproducibility checklist in [2].\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Dodge, Jesse, et al. \"Show Your Work: Improved Reporting of Experimental Results.\" EMNLP. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Mitchell et al. \"Model cards for model reporting.\" Proceedings of the conference on fairness, accountability, and transparency. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-description-1\"\u003eModel Description\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide basic information about your model, including (1) the model type (e.g., BART, Pointer Network), (2) model version/date if multiple versions are available, (3) training algorithms used. Please cite papers or other resources where further information about the model can be found. Also include the model license and citation details.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe used pre-trained T5-base model implemented in HuggingFace's transformers library. Model version is from April-May 2021 and for training we follow a similar approach to the baseline model described in the GEM benchmark paper of fine-tuning on the GEM training datasets. We performed additional pre-training on target texts and in one case on abstracts from DBpedia where we experimented with different masking strategies for pre-training. Mainly, we tried a strategy of masking random spans and compared it with another strategy where we masked entities/predicates/concepts present in the input data.\u003c/p\u003e\n\u003ch3 id=\"user-content-model-hyperparameters\"\u003eModel Hyperparameters\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide the range of hyperparameters that would be required to reproduce your final model (e.g., optimizer used, number of epochs, learning rate, etc.). If hyperparameter search was used, please describe (1) the bounds for each hyperparameter, (2) the number of hyperparameter search trials, (3) the method for choosing hyperparameter values (e.g., uniform sampling, manual tuning, evolutionary optimization, etc.).\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe mostly used the default hyperparameter values. The values changed are mentioned below:\nEpochs: 10 (Best model chosen with minimum loss on the validation set)\nLearning Rate: 1e-3\nBeam Size: 5\nSeed: 2601\u003c/p\u003e\n\u003ch3 id=\"user-content-dataset-details\"\u003eDataset Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude relevant training data statistics (e.g., number of samples used, whether some subsets of the dataset were discarded), the training/validation/test splits for the number of samples and any pre-processing steps if used.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe use the data from the 'training' subset for each dataset.\u003c/p\u003e\n\u003ch3 id=\"user-content-dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude a specification of library dependencies\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eN/A\u003c/p\u003e\n\u003ch3 id=\"user-content-link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/h3\u003e\n\u003cp\u003eN/A\u003c/p\u003e\n\u003ch3 id=\"user-content-computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the computing infrastructure used to train your model (e.g., number of GPUs, GPU type and vRAM) and the time taken to train your final model.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eNVIDIA GeForce GTX 1080 GPU. Model training would have taken approximately 12-15 hours on all the datasets where we make a submission.\u003c/p\u003e\n\u003ch3 id=\"user-content-evaluation-details\"\u003eEvaluation Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eHow were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eWe used the GEM metrics evaluation script for automatic evaluation.\u003c/p\u003e\n","title":"NUIG-DSI","type":"Shared Task 2021","background":"Finetuned T5 with additional pretraining data."}},"__N_SSG":true},"page":"/model_cards/[id]","query":{"id":"NUIG-DSI"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/model_cards/POINTER.html b/model_cards/POINTER.html
index 2c6e2181..c855bccf 100644
--- a/model_cards/POINTER.html
+++ b/model_cards/POINTER.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->POINTER</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards/%5Bid%5D-9cbdb0ece408e680.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">POINTER</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Shared Task 2021</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->POINTER</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards/%5Bid%5D-9ac5e4b15e7a7f67.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">POINTER</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Shared Task 2021</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
 <ul>
 <li><a href="#model-description">Model Description</a></li>
 <li><a href="#social-impact">Social Impact</a>
@@ -99,4 +99,4 @@ <h3 id="user-content-computing-infrastructure-used">Computing Infrastructure Use
 <h3 id="user-content-evaluation-details">Evaluation Details</h3>
 <p><em>How were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.</em></p>
 <p>The model performance was evaluated using different metrics for lexical similarity (ROUGE 1/2/L, BLEU, Meteor), semantic similarity (BERTscore, BLEURT) and diversity (MSTTR, Distinct 1/2/3, Unique 1/2/3, Entropy 1/2/3) measures. The file with all described metrics and their results is available at <a href="https://github.com/asnota/metrics">https://github.com/asnota/metrics</a></p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#social-impact\"\u003eSocial Impact\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#additional-data\"\u003eAdditional Data\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-process\"\u003eTraining Process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#real-world-use\"\u003eReal-World Use\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#measuring-impact\"\u003eMeasuring Impact\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#reproducibility\"\u003eReproducibility\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description-1\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#model-hyperparameters\"\u003eModel Hyperparameters\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-details\"\u003eDataset Details\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation-details\"\u003eEvaluation Details\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-model-description\"\u003eModel Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"https://aclanthology.org/2021.gem-1.15/\"\u003eSystem Description for the CommonGen task with the POINTER model\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eCreator:\u003c/strong\u003e Anna Shvets\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e Anna Shvets (\u003ca href=\"mailto:anna.shvets@inetum.com\"\u003eanna.shvets@inetum.com\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-social-impact\"\u003eSocial Impact\u003c/h2\u003e\n\u003cp\u003eIn this section, we ask you to provide information on all of the steps that went into obtaining your models, especially in regards to how they would affect a user’s interactions with the technology if it was deployed in a live system.\u003c/p\u003e\n\u003cp\u003eThen, please choose one of these steps to analyze in terms of possible negative impacts on potential direct and indirect users, and propose a test to evaluate the existence and magnitude of that impact. We provide examples of such analyses in the following two paragraphs.\u003c/p\u003e\n\u003cp\u003eConsider for example a model pre-trained on English Wikipedia and fine-tuned on a summarization dataset, and imagine that such a model is deployed in a news website to provide automatic summaries. Given the gender gap on Wikipedia, we can imagine two possible effects:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003ethe model could systematically produce summaries with lower ROUGE scores when compared to a reference for articles describing women than it would for articles describing men.\u003c/li\u003e\n\u003cli\u003ethe model may be less likely to name the subject of an article if the subject is a woman than if the subject is a man.\nWe can measure either of these effects by running an entity linking system on the articles, for example, and comparing subsets of the test set where the gender of the actors is known.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eAlternatively, imagine that a system that is fine-tuned on WikiAuto, which aligns English Wikipedia text to its Simple English Wikipedia version, is used in an attempt to make a blog where the writer talks about their personal experience more accessible. How well does the model handle the shift from a third person to a first person point of view? One way we can check whether the behavior is roughly the same is by comparing the copy rate (number of words from the input that are re-used in the output) between the Wikipedia and blog setting.\u003c/p\u003e\n\u003cp\u003eWe ask you to take a similar approach to analyzing your model:\u003c/p\u003e\n\u003ch3 id=\"user-content-additional-data\"\u003eAdditional Data\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eIf you used a model that was pre-trained on additional data or used additional data, please describe it. Provide a link to a datasheet or data statement if there is one available, otherwise, provide as much relevant information as possible on the source of the data, the people represented in it, its languages, licensing, pre-processing, etc.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe model used for CommonGEN task is based on pre-trained POINTER model available at \u003ca href=\"https://github.com/dreasysnail/POINTER\"\u003ehttps://github.com/dreasysnail/POINTER\u003c/a\u003e. According to authors of the article, describing the pre-training process (\u003ca href=\"https://arxiv.org/abs/2005.00558\"\u003ehttps://arxiv.org/abs/2005.00558\u003c/a\u003e), 12Ga of English Wikipedia corpora has been used to produce above-metioned pre-trained wiki model. The pretrained model is distributed under MIT licence: \u003ca href=\"https://github.com/dreasysnail/POINTER/blob/master/LICENSE\"\u003ehttps://github.com/dreasysnail/POINTER/blob/master/LICENSE\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-training-process\"\u003eTraining Process\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the training or fine tuning setup, including whether the final model was trained on a single task or in a multi-task setting. If a data augmentation technique was used, describe the technique\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe fune tuning was done using training data for CommonGEN task. The data pre-processing included the ml masks formation with a special NOI tag on 3 data epochs, which resulted in data augmentation from  67.389 source entries to 160.680 processed entries.\u003c/p\u003e\n\u003ch3 id=\"user-content-real-world-use\"\u003eReal-World Use\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe a possible real-use application of your model, then choose one of the steps (e.g. choice of the pre-trained model, data source, data augmentation, training loss, etc.) above and describe a negative impact it may have on the user experience\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eSince the model was pre-trained on English Wikipedia corpora, the bias inherent to the dataset will potentially influence the generated texts, leading to potential discrimination of minority groups or marginalized communities.\u003c/p\u003e\n\u003ch3 id=\"user-content-measuring-impact\"\u003eMeasuring Impact\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003ePropose a method to test the magnitude of the impact identified in your previous answer\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThere are several methodologies to measure bias in a dataset, reflected in a devoted study: \u003ca href=\"https://arxiv.org/abs/1908.09635\"\u003ehttps://arxiv.org/abs/1908.09635\u003c/a\u003e. In order to measure the fairness of the generated text, one of the fairness metrics might be applied, including equalized odds, predictive parity, counterfactual fairness or demographic parity.\u003c/p\u003e\n\u003ch2 id=\"user-content-reproducibility\"\u003eReproducibility\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eIn this section, we ask you to provide any information that would be required for someone to reproduce your model and experimental results. These questions are derived from the suggested model card in [1] and the reproducibility checklist in [2].\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Dodge, Jesse, et al. \"Show Your Work: Improved Reporting of Experimental Results.\" EMNLP. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Mitchell et al. \"Model cards for model reporting.\" Proceedings of the conference on fairness, accountability, and transparency. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-description-1\"\u003eModel Description\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide basic information about your model, including (1) the model type (e.g., BART, Pointer Network), (2) model version/date if multiple versions are available, (3) training algorithms used. Please cite papers or other resources where further information about the model can be found. Also include the model license and citation details.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003ePOINTER is a hybrid architecture, combining transformers with insertion-based networks. The original paper was published in 2020 (\u003ca href=\"https://arxiv.org/abs/2005.00558\"\u003ehttps://arxiv.org/abs/2005.00558\u003c/a\u003e), along with the pretrained wiki model available for download, along with the source code at \u003ca href=\"https://github.com/dreasysnail/POINTER\"\u003ehttps://github.com/dreasysnail/POINTER\u003c/a\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-hyperparameters\"\u003eModel Hyperparameters\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide the range of hyperparameters that would be required to reproduce your final model (e.g., optimizer used, number of epochs, learning rate, etc.). If hyperparameter search was used, please describe (1) the bounds for each hyperparameter, (2) the number of hyperparameter search trials, (3) the method for choosing hyperparameter values (e.g., uniform sampling, manual tuning, evolutionary optimization, etc.).\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eOptimizer: AdamW\u003c/p\u003e\n\u003cp\u003eLearningrate: 1e-5\u003c/p\u003e\n\u003cp\u003eAdam epsilon: 1e-8\u003c/p\u003e\n\u003cp\u003eWarmup steps: 10\u003c/p\u003e\n\u003cp\u003eSeed: 1\u003c/p\u003e\n\u003cp\u003eEpochs: 10\u003c/p\u003e\n\u003cp\u003eBatch size: 64\u003c/p\u003e\n\u003ch3 id=\"user-content-the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/h3\u003e\n\u003cp\u003eThere are two sampling methods used while the inference phase - greedy and sampling. Greedy is based on a greedy search algorithm, while sampling uses top-k, top-p and temperature parameters to render model predictions. Here is a set for both of sampling strategies:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003e\n\u003cp\u003eGreedy:\nnoi_decay: 1,\nreduce_decay: 1,\nprevent: True,\nreduce_stop: True,\nlessrepeat: True.\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eSampling:\ntop_k: 10,\ntop_p: 0.9,\ntemperature: 1.\u003c/p\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-dataset-details\"\u003eDataset Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude relevant training data statistics (e.g., number of samples used, whether some subsets of the dataset were discarded), the training/validation/test splits for the number of samples and any pre-processing steps if used.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e67.389 source entries from training set of CommonGEM dataset were converted to 160.680 processed entries. The main purpose of pre-processing consists of creation of lm labels with a special NOI tag, masking the word that should be further inserted at the inference phase.\u003c/p\u003e\n\u003ch3 id=\"user-content-dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude a specification of library dependencies\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eSince the TPU was used for training, XLA support is necessary.\u003c/p\u003e\n\u003ch3 id=\"user-content-link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/h3\u003e\n\u003cp\u003e\u003ca href=\"https://github.com/dreasysnail/POINTER\"\u003ehttps://github.com/dreasysnail/POINTER\u003c/a\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the computing infrastructure used to train your model (e.g., number of GPUs, GPU type and vRAM) and the time taken to train your final model.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe training was done using TPU-v3-8, following the multiprocessing paradigm. The total training time was 3 hours.\u003c/p\u003e\n\u003ch3 id=\"user-content-evaluation-details\"\u003eEvaluation Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eHow were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe model performance was evaluated using different metrics for lexical similarity (ROUGE 1/2/L, BLEU, Meteor), semantic similarity (BERTscore, BLEURT) and diversity (MSTTR, Distinct 1/2/3, Unique 1/2/3, Entropy 1/2/3) measures. The file with all described metrics and their results is available at \u003ca href=\"https://github.com/asnota/metrics\"\u003ehttps://github.com/asnota/metrics\u003c/a\u003e\u003c/p\u003e\n","title":"POINTER","type":"Shared Task 2021","background":"POINTER is a hybrid architecture, combining transformers with insertion-based networks."}},"__N_SSG":true},"page":"/model_cards/[id]","query":{"id":"POINTER"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#social-impact\"\u003eSocial Impact\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#additional-data\"\u003eAdditional Data\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-process\"\u003eTraining Process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#real-world-use\"\u003eReal-World Use\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#measuring-impact\"\u003eMeasuring Impact\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#reproducibility\"\u003eReproducibility\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description-1\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#model-hyperparameters\"\u003eModel Hyperparameters\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-details\"\u003eDataset Details\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation-details\"\u003eEvaluation Details\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-model-description\"\u003eModel Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"https://aclanthology.org/2021.gem-1.15/\"\u003eSystem Description for the CommonGen task with the POINTER model\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eCreator:\u003c/strong\u003e Anna Shvets\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e Anna Shvets (\u003ca href=\"mailto:anna.shvets@inetum.com\"\u003eanna.shvets@inetum.com\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-social-impact\"\u003eSocial Impact\u003c/h2\u003e\n\u003cp\u003eIn this section, we ask you to provide information on all of the steps that went into obtaining your models, especially in regards to how they would affect a user’s interactions with the technology if it was deployed in a live system.\u003c/p\u003e\n\u003cp\u003eThen, please choose one of these steps to analyze in terms of possible negative impacts on potential direct and indirect users, and propose a test to evaluate the existence and magnitude of that impact. We provide examples of such analyses in the following two paragraphs.\u003c/p\u003e\n\u003cp\u003eConsider for example a model pre-trained on English Wikipedia and fine-tuned on a summarization dataset, and imagine that such a model is deployed in a news website to provide automatic summaries. Given the gender gap on Wikipedia, we can imagine two possible effects:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003ethe model could systematically produce summaries with lower ROUGE scores when compared to a reference for articles describing women than it would for articles describing men.\u003c/li\u003e\n\u003cli\u003ethe model may be less likely to name the subject of an article if the subject is a woman than if the subject is a man.\nWe can measure either of these effects by running an entity linking system on the articles, for example, and comparing subsets of the test set where the gender of the actors is known.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eAlternatively, imagine that a system that is fine-tuned on WikiAuto, which aligns English Wikipedia text to its Simple English Wikipedia version, is used in an attempt to make a blog where the writer talks about their personal experience more accessible. How well does the model handle the shift from a third person to a first person point of view? One way we can check whether the behavior is roughly the same is by comparing the copy rate (number of words from the input that are re-used in the output) between the Wikipedia and blog setting.\u003c/p\u003e\n\u003cp\u003eWe ask you to take a similar approach to analyzing your model:\u003c/p\u003e\n\u003ch3 id=\"user-content-additional-data\"\u003eAdditional Data\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eIf you used a model that was pre-trained on additional data or used additional data, please describe it. Provide a link to a datasheet or data statement if there is one available, otherwise, provide as much relevant information as possible on the source of the data, the people represented in it, its languages, licensing, pre-processing, etc.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe model used for CommonGEN task is based on pre-trained POINTER model available at \u003ca href=\"https://github.com/dreasysnail/POINTER\"\u003ehttps://github.com/dreasysnail/POINTER\u003c/a\u003e. According to authors of the article, describing the pre-training process (\u003ca href=\"https://arxiv.org/abs/2005.00558\"\u003ehttps://arxiv.org/abs/2005.00558\u003c/a\u003e), 12Ga of English Wikipedia corpora has been used to produce above-metioned pre-trained wiki model. The pretrained model is distributed under MIT licence: \u003ca href=\"https://github.com/dreasysnail/POINTER/blob/master/LICENSE\"\u003ehttps://github.com/dreasysnail/POINTER/blob/master/LICENSE\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-training-process\"\u003eTraining Process\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the training or fine tuning setup, including whether the final model was trained on a single task or in a multi-task setting. If a data augmentation technique was used, describe the technique\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe fune tuning was done using training data for CommonGEN task. The data pre-processing included the ml masks formation with a special NOI tag on 3 data epochs, which resulted in data augmentation from  67.389 source entries to 160.680 processed entries.\u003c/p\u003e\n\u003ch3 id=\"user-content-real-world-use\"\u003eReal-World Use\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe a possible real-use application of your model, then choose one of the steps (e.g. choice of the pre-trained model, data source, data augmentation, training loss, etc.) above and describe a negative impact it may have on the user experience\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eSince the model was pre-trained on English Wikipedia corpora, the bias inherent to the dataset will potentially influence the generated texts, leading to potential discrimination of minority groups or marginalized communities.\u003c/p\u003e\n\u003ch3 id=\"user-content-measuring-impact\"\u003eMeasuring Impact\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003ePropose a method to test the magnitude of the impact identified in your previous answer\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThere are several methodologies to measure bias in a dataset, reflected in a devoted study: \u003ca href=\"https://arxiv.org/abs/1908.09635\"\u003ehttps://arxiv.org/abs/1908.09635\u003c/a\u003e. In order to measure the fairness of the generated text, one of the fairness metrics might be applied, including equalized odds, predictive parity, counterfactual fairness or demographic parity.\u003c/p\u003e\n\u003ch2 id=\"user-content-reproducibility\"\u003eReproducibility\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eIn this section, we ask you to provide any information that would be required for someone to reproduce your model and experimental results. These questions are derived from the suggested model card in [1] and the reproducibility checklist in [2].\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Dodge, Jesse, et al. \"Show Your Work: Improved Reporting of Experimental Results.\" EMNLP. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Mitchell et al. \"Model cards for model reporting.\" Proceedings of the conference on fairness, accountability, and transparency. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-description-1\"\u003eModel Description\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide basic information about your model, including (1) the model type (e.g., BART, Pointer Network), (2) model version/date if multiple versions are available, (3) training algorithms used. Please cite papers or other resources where further information about the model can be found. Also include the model license and citation details.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003ePOINTER is a hybrid architecture, combining transformers with insertion-based networks. The original paper was published in 2020 (\u003ca href=\"https://arxiv.org/abs/2005.00558\"\u003ehttps://arxiv.org/abs/2005.00558\u003c/a\u003e), along with the pretrained wiki model available for download, along with the source code at \u003ca href=\"https://github.com/dreasysnail/POINTER\"\u003ehttps://github.com/dreasysnail/POINTER\u003c/a\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-hyperparameters\"\u003eModel Hyperparameters\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide the range of hyperparameters that would be required to reproduce your final model (e.g., optimizer used, number of epochs, learning rate, etc.). If hyperparameter search was used, please describe (1) the bounds for each hyperparameter, (2) the number of hyperparameter search trials, (3) the method for choosing hyperparameter values (e.g., uniform sampling, manual tuning, evolutionary optimization, etc.).\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eOptimizer: AdamW\u003c/p\u003e\n\u003cp\u003eLearningrate: 1e-5\u003c/p\u003e\n\u003cp\u003eAdam epsilon: 1e-8\u003c/p\u003e\n\u003cp\u003eWarmup steps: 10\u003c/p\u003e\n\u003cp\u003eSeed: 1\u003c/p\u003e\n\u003cp\u003eEpochs: 10\u003c/p\u003e\n\u003cp\u003eBatch size: 64\u003c/p\u003e\n\u003ch3 id=\"user-content-the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/h3\u003e\n\u003cp\u003eThere are two sampling methods used while the inference phase - greedy and sampling. Greedy is based on a greedy search algorithm, while sampling uses top-k, top-p and temperature parameters to render model predictions. Here is a set for both of sampling strategies:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003e\n\u003cp\u003eGreedy:\nnoi_decay: 1,\nreduce_decay: 1,\nprevent: True,\nreduce_stop: True,\nlessrepeat: True.\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eSampling:\ntop_k: 10,\ntop_p: 0.9,\ntemperature: 1.\u003c/p\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-dataset-details\"\u003eDataset Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude relevant training data statistics (e.g., number of samples used, whether some subsets of the dataset were discarded), the training/validation/test splits for the number of samples and any pre-processing steps if used.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e67.389 source entries from training set of CommonGEM dataset were converted to 160.680 processed entries. The main purpose of pre-processing consists of creation of lm labels with a special NOI tag, masking the word that should be further inserted at the inference phase.\u003c/p\u003e\n\u003ch3 id=\"user-content-dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude a specification of library dependencies\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eSince the TPU was used for training, XLA support is necessary.\u003c/p\u003e\n\u003ch3 id=\"user-content-link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/h3\u003e\n\u003cp\u003e\u003ca href=\"https://github.com/dreasysnail/POINTER\"\u003ehttps://github.com/dreasysnail/POINTER\u003c/a\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the computing infrastructure used to train your model (e.g., number of GPUs, GPU type and vRAM) and the time taken to train your final model.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe training was done using TPU-v3-8, following the multiprocessing paradigm. The total training time was 3 hours.\u003c/p\u003e\n\u003ch3 id=\"user-content-evaluation-details\"\u003eEvaluation Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eHow were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe model performance was evaluated using different metrics for lexical similarity (ROUGE 1/2/L, BLEU, Meteor), semantic similarity (BERTscore, BLEURT) and diversity (MSTTR, Distinct 1/2/3, Unique 1/2/3, Entropy 1/2/3) measures. The file with all described metrics and their results is available at \u003ca href=\"https://github.com/asnota/metrics\"\u003ehttps://github.com/asnota/metrics\u003c/a\u003e\u003c/p\u003e\n","title":"POINTER","type":"Shared Task 2021","background":"POINTER is a hybrid architecture, combining transformers with insertion-based networks."}},"__N_SSG":true},"page":"/model_cards/[id]","query":{"id":"POINTER"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/model_cards/SimpleNER.html b/model_cards/SimpleNER.html
index cf8612aa..2a468162 100644
--- a/model_cards/SimpleNER.html
+++ b/model_cards/SimpleNER.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->SimpleNER</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards/%5Bid%5D-9cbdb0ece408e680.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">SimpleNER</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Shared Task 2021</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->SimpleNER</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/model_cards/%5Bid%5D-9ac5e4b15e7a7f67.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">SimpleNER</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Shared Task 2021</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
 <ul>
 <li><a href="#model-description">Model Description</a></li>
 <li><a href="#social-impact">Social Impact</a>
@@ -214,4 +214,4 @@ <h3 id="user-content-evaluation-details">Evaluation Details</h3>
 <li>on TurkCorpus-test | BLEU: 67.667 | SARI: 39.695</li>
 <li>on TurkCorpus-validation | BLEU: 75.672 | SARI: 39.407</li>
 </ul>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#social-impact\"\u003eSocial Impact\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#additional-data\"\u003eAdditional Data\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-process\"\u003eTraining Process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#real-world-use\"\u003eReal-World Use\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#measuring-impact\"\u003eMeasuring Impact\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#reproducibility\"\u003eReproducibility\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description-1\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#model-hyperparameters\"\u003eModel Hyperparameters\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-details\"\u003eDataset Details\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation-details\"\u003eEvaluation Details\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-model-description\"\u003eModel Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"https://aclanthology.org/2021.gem-1.14/\"\u003eSimpleNER Sentence Simplification System for GEM 2021\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eCreators:\u003c/strong\u003e K V Aditya Srivatsa, Monil Gokani, Manish Shrivastava\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e K V Aditya Srivatsa (\u003ca href=\"mailto:k.v.aditya@research.iiit.ac.in\"\u003ek.v.aditya@research.iiit.ac.in\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-social-impact\"\u003eSocial Impact\u003c/h2\u003e\n\u003cp\u003eIn this section, we ask you to provide information on all of the steps that went into obtaining your models, especially in regards to how they would affect a user’s interactions with the technology if it was deployed in a live system.\u003c/p\u003e\n\u003cp\u003eThen, please choose one of these steps to analyze in terms of possible negative impacts on potential direct and indirect users, and propose a test to evaluate the existence and magnitude of that impact. We provide examples of such analyses in the following two paragraphs.\u003c/p\u003e\n\u003cp\u003eConsider for example a model pre-trained on English Wikipedia and fine-tuned on a summarization dataset, and imagine that such a model is deployed in a news website to provide automatic summaries. Given the gender gap on Wikipedia, we can imagine two possible effects:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003ethe model could systematically produce summaries with lower ROUGE scores when compared to a reference for articles describing women than it would for articles describing men.\u003c/li\u003e\n\u003cli\u003ethe model may be less likely to name the subject of an article if the subject is a woman than if the subject is a man.\nWe can measure either of these effects by running an entity linking system on the articles, for example, and comparing subsets of the test set where the gender of the actors is known.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eAlternatively, imagine that a system that is fine-tuned on WikiAuto, which aligns English Wikipedia text to its Simple English Wikipedia version, is used in an attempt to make a blog where the writer talks about their personal experience more accessible. How well does the model handle the shift from a third person to a first person point of view? One way we can check whether the behavior is roughly the same is by comparing the copy rate (number of words from the input that are re-used in the output) between the Wikipedia and blog setting.\u003c/p\u003e\n\u003cp\u003eWe ask you to take a similar approach to analyzing your model:\u003c/p\u003e\n\u003ch3 id=\"user-content-additional-data\"\u003eAdditional Data\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eIf you used a model that was pre-trained on additional data or used additional data, please describe it. Provide a link to a datasheet or data statement if there is one available, otherwise, provide as much relevant information as possible on the source of the data, the people represented in it, its languages, licensing, pre-processing, etc.\u003c/em\u003e\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003e\n\u003cp\u003eWikiLarge Training data (Zhang and Lapata, 2017) (\u003ca href=\"https://github.com/XingxingZhang/dress\"\u003ehttps://github.com/XingxingZhang/dress\u003c/a\u003e):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eAligned complex-simple sentence pairs: 296,402\u003c/li\u003e\n\u003cli\u003eCreated by aggregating several simplification corpora: Kauchak (2013), Woodsend and Lapata (2011), and the WikiSmall dataset (Zhu, 2010)\u003c/li\u003e\n\u003cli\u003eLanguages: English\u003c/li\u003e\n\u003cli\u003eLicensing: The MIT License (MIT) Copyright © 2018 Zalando SE\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eFastText pretrained word-vectors (Mikolov et. al., 2018) (\u003ca href=\"https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip\"\u003ehttps://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip\u003c/a\u003e):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e1 million word vectors trained on Wikipedia 2017, UMBC webbase corpus and statmt.org news dataset (16B tokens).\u003c/li\u003e\n\u003cli\u003eLanguage: English\u003c/li\u003e\n\u003cli\u003eLicense: Creative Commons Attribution-Share-Alike License 3.0.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eFlair Ontonotes NER Tagger (Schweter and Akbik, 2020) (\u003ca href=\"https://github.com/flairNLP/flair\"\u003ehttps://github.com/flairNLP/flair\u003c/a\u003e):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eUses train, development and test sets of CoNLL 2012 shared task for coreference resolution (Pradhan et al., 2012)\u003c/li\u003e\n\u003cli\u003eLanguage: English\u003c/li\u003e\n\u003cli\u003eLicense: The MIT License (MIT) Copyright © 2018 Zalando SE\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-training-process\"\u003eTraining Process\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the training or fine tuning setup, including whether the final model was trained on a single task or in a multi-task setting. If a data augmentation technique was used, describe the technique\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe transformer model is trained (single-task) from scratch on the ~290k sentence pairs from the WikiLarge corpus (already having the named-entities replaced by special tokens), with pre-trained FastText embeddings loaded.\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003eNamed-Entity tagging:\nInstances of named entities identified by Flair’s Ontonotes model, occurring on the complex half of the data, are replaced by respective special tokens:\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eEg. New York --\u003e LOC (tag predicted by NER model) --\u003e LOC@1 (‘New York’ replaced by unique tag, while the respective token-tag mapping is saved sentence-wise for de-tagging after output generation)\u003c/p\u003e\n\u003col start=\"2\"\u003e\n\u003cli\u003eAdding Control Tokens:\nOnce the sentence pairs have been pruned and NER-tagged, four control attributes (Martin et. al., 2020) (\u003ca href=\"https://arxiv.org/pdf/1910.02677\"\u003ehttps://arxiv.org/pdf/1910.02677\u003c/a\u003e) are calculated between each of the complex and simple sentence pairs:\n\u003cul\u003e\n\u003cli\u003eCompression Ratio: Simple ratio between token lengths of the simple and complex sentences. (recorded as: \u0026#x3C;NbChars_x.xx\u003e)\u003c/li\u003e\n\u003cli\u003eLevenshtein Distance: Normalised character level Levenshtein distance between the complex and simple sentence. (recorded as: \u0026#x3C;LevSim_x.xx\u003e )\u003c/li\u003e\n\u003cli\u003eWord Rank: Ratio of the average of the third-quartile log ranks of the words (excluding stopwords and special tokens) in the simple and complex sentence (extracted from the order of FastText word embeddings being used for training). (recorded as: \u0026#x3C;WordRank_x.xx\u003e)\u003c/li\u003e\n\u003cli\u003eDependency Tree Depth: Ratio of the maximum dependency tree depth of the simple and complex sentence. (recorded as: \u0026#x3C;DepTreeDepth_x.xx\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eEg.\nComplex: it is particularly famous for the cultivation of kiwifruit .\nSimple: it is mostly famous for the growing of kiwifruit .\u003c/p\u003e\n\u003cp\u003eRewriting complex:\n\u0026#x3C;NbChars_0.8\u003e \u0026#x3C;LevSim_0.76\u003e \u0026#x3C;WordRank_0.79\u003e it is particularly famous for the cultivation of kiwifruit .\u003c/p\u003e\n\u003ch3 id=\"user-content-real-world-use\"\u003eReal-World Use\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe a possible real-use application of your model, then choose one of the steps (e.g. choice of the pre-trained model, data source, data augmentation, training loss, etc.) above and describe a negative impact it may have on the user experience\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eText Simplification has always found crucial importance in making it easier for people with cognitive disabilities such as aphasia, dyslexia and autism, to read and understand text. Additionally, it proves helpful for second language learners, especially in public service centres such as airports or health clinics. A model such as ours can be utilized to produce point-to-point simplifications for such people, and therefore must be made more accessible, but more importantly, must be made more reliable and faithful in terms of the original information/ message it must convey.  Even with nearly 300k aligned sentence pairs, train sets such as WikiLarge fail to help large transformer models being trained from scratch to generalize well,  which causes them to suffer from data sparsity, which can be observed with words in the vocabulary having very high word ranks (ordered by frequency), such as named entities. We attempted to mitigate this issue by replacing the named entities in our data with (18) special tokens before training, which are promptly replaced back with the original tokens. This helps to reduce the model vocabulary, and allows for greater generalization.  Although this step is beneficial to the overall performance of the model, it also brings with it a negative effect. The idea of replacing NEs is based on the assumption that such tokens or phrases need not be modified or deleted during the task of simplification. Although this is true for the most part when it comes to evaluation sets like the TurkCorpus and ASSET, potentially complex named entities need to be simplified or elaborated upon in real life uses, especially if the use involves second language learners or people with reading disabilities.  Eg. Consider an input statement as “Counters 3-6 are reserved for Commissioned Military Personnel.” Ideally, a simplified sentence should read something along the lines of “Counters 3-6 may be used only by army officers.” However, since Commissioned Military Personnel would be excluded from the simplification process by virtue of being a named entity, this simplification would not take place.\u003c/p\u003e\n\u003ch3 id=\"user-content-measuring-impact\"\u003eMeasuring Impact\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003ePropose a method to test the magnitude of the impact identified in your previous answer\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eGauging the need for elaboration or replacement of a named entity in a sentence requires a strong proficiency in the parent language and at least some knowledge of the named entity itself. Therefore this task is best suited for manual evaluation where annotators must be asked to rate the simplification upon how well were the constituent named entities simplified or elaborated. This can be done using a simple Likert-scale.\u003c/p\u003e\n\u003cp\u003eHowever, as manual evaluation is slow and resource intensive, there is a need for automated evaluation methods to approximate human judgment. A possible way to quantify the need for replacement or elaboration of named entities is the average similarity the NE holds to the other words in the sentence. If a NE with a high word rank shows very little similarity to the words in the sentence, it is likely that the sentence does not attempt to elaborate/ explain the NE. On the other hand, a high average similarity indicates that there are other words in the sentence that are related to the NE, and are most likely being used to elaborate upon the NE. A simple formulation of this measure can be the product of the word rank of the NE and its average similarity with other words in the sentence. Still, there is clearly a need for more nuanced methods of evaluation for text simplification, which capture the performance of a model on such attributes and expected operations.\u003c/p\u003e\n\u003ch2 id=\"user-content-reproducibility\"\u003eReproducibility\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eIn this section, we ask you to provide any information that would be required for someone to reproduce your model and experimental results. These questions are derived from the suggested model card in [1] and the reproducibility checklist in [2].\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Dodge, Jesse, et al. \"Show Your Work: Improved Reporting of Experimental Results.\" EMNLP. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Mitchell et al. \"Model cards for model reporting.\" Proceedings of the conference on fairness, accountability, and transparency. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-description-1\"\u003eModel Description\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide basic information about your model, including (1) the model type (e.g., BART, Pointer Network), (2) model version/date if multiple versions are available, (3) training algorithms used. Please cite papers or other resources where further information about the model can be found. Also include the model license and citation details.\u003c/em\u003e\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\n\u003cp\u003eModel architecture: Transformer (Vaswani et. al., 2017) (Facebook’s FairSequence Implementation)\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eEncoder and decoder layers: 6\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eEncoder and decoder attentions heads: 6\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eEncoder and decoder embedding dimensionality: 300\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eEncoder and decoder fully-connected layer dimensionality: 2048\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eTraining Algorithm: Fairseq task set to ‘translation’. Complex and simple sentence pairs set as source and target language pairs for translation.\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eVersion: fairseq@v0.10.0 (\u003ca href=\"https://github.com/pytorch/fairseq@v0.10.0\"\u003ehttps://github.com/pytorch/fairseq@v0.10.0\u003c/a\u003e)\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eModel architecture citation:\n@article{DBLP:journals/corr/VaswaniSPUJGKP17,\nauthor    = {Ashish Vaswani and\nNoam Shazeer and\nNiki Parmar and\nJakob Uszkoreit and\nLlion Jones and\nAidan N. Gomez and\nLukasz Kaiser and\nIllia Polosukhin},\ntitle     = {Attention Is All You Need},\njournal   = {CoRR},\nvolume    = {abs/1706.03762},\nyear      = {2017},\nurl       = {\u003ca href=\"http://arxiv.org/abs/1706.03762%7D\"\u003ehttp://arxiv.org/abs/1706.03762}\u003c/a\u003e,\narchivePrefix = {arXiv},\neprint    = {1706.03762},\ntimestamp = {Sat, 23 Jan 2021 01:20:40 +0100},\nbiburl    = {\u003ca href=\"https://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib%7D\"\u003ehttps://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib}\u003c/a\u003e,\nbibsource = {dblp computer science bibliography, \u003ca href=\"https://dblp.org%7D\"\u003ehttps://dblp.org}\u003c/a\u003e\n}\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eModel implementation citation:\n@inproceedings{ott2019fairseq,\ntitle = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},\nauthor = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli},\nbooktitle = {Proceedings of NAACL-HLT 2019: Demonstrations},\nyear = {2019},\n}\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eModel implementation license: The MIT License (MIT) Copyright © 2018 Zalando SE (\u003ca href=\"https://github.com/pytorch/fairseq/blob/master/LICENSE\"\u003ehttps://github.com/pytorch/fairseq/blob/master/LICENSE\u003c/a\u003e)\u003c/p\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-model-hyperparameters\"\u003eModel Hyperparameters\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide the range of hyperparameters that would be required to reproduce your final model (e.g., optimizer used, number of epochs, learning rate, etc.). If hyperparameter search was used, please describe (1) the bounds for each hyperparameter, (2) the number of hyperparameter search trials, (3) the method for choosing hyperparameter values (e.g., uniform sampling, manual tuning, evolutionary optimization, etc.).\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eNumber of epochs, learning rate, optimizer, dropout, number of warmup updates. (No hyperparameter search was used)\u003c/p\u003e\n\u003ch3 id=\"user-content-the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/h3\u003e\n\u003cul\u003e\n\u003cli\u003eNumber of epochs: 20 (best checkpoint: epoch_13)\u003c/li\u003e\n\u003cli\u003eLearning rate: 0.00011\u003c/li\u003e\n\u003cli\u003eOptimizer: adam (betas: (0.9, 0.999) ; eps: (1e-8))\u003c/li\u003e\n\u003cli\u003eDropout: 0.2\u003c/li\u003e\n\u003cli\u003eWarmup updates: 4000\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-dataset-details\"\u003eDataset Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude relevant training data statistics (e.g., number of samples used, whether some subsets of the dataset were discarded), the training/validation/test splits for the number of samples and any pre-processing steps if used.\u003c/em\u003e\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003eTraining Dataset: WikiLarge (Zhang and Lapata, 2017) train set  - Aligned complex-simple sentence pairs: 296,402  - Link: \u003ca href=\"https://github.com/XingxingZhang/dress\"\u003ehttps://github.com/XingxingZhang/dress\u003c/a\u003e  2. Validation and Testing Datasets:     a. TurkCorpus (Xu et. al., 2016) (\u003ca href=\"https://gem-benchmark.com/data_cards/TURK\"\u003ehttps://gem-benchmark.com/data_cards/TURK\u003c/a\u003e):         i. Test split: 359 complex sentences each with 8 different simplifications.         ii. Validation split: 2000 complex sentences each with 8 different simplifications.     b. ASSET (Alva-Manchego et. al., 2020) (\u003ca href=\"https://gem-benchmark.com/data_cards/ASSET\"\u003ehttps://gem-benchmark.com/data_cards/ASSET\u003c/a\u003e):          i. Test split: 359 complex sentences each with 10 different simplifications.         ii. Validation split: 2000 complex sentences each with 10 different simplifications.  3. Preprocessing:  - All three (train, validation and test) data splits were converted to lowercase.  - For train set (WikiLarge):      - Sentence pairs with either instance having token length lower than 3 were removed.      - Sentence pairs with compression ratio (len(target)/len(source)) out of the closed bounds [0.2, 1.5] were removed.  - For all three data splits, flair’s ontonotes NER tagger (\u003ca href=\"https://github.com/flairNLP/flair\"\u003ehttps://github.com/flairNLP/flair\u003c/a\u003e) was used to replace all detectable named entities with respective special tokens (see system description) for training.\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude a specification of library dependencies\u003c/em\u003e\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003etorch==1.7.0\u003c/li\u003e\n\u003cli\u003efairseq (git+\u003ca href=\"https://github.com/pytorch/fairseq@v0.10.0\"\u003ehttps://github.com/pytorch/fairseq@v0.10.0\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003eeasse (git+\u003ca href=\"https://github.com/feralvam/easse.git\"\u003ehttps://github.com/feralvam/easse.git\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003edatasets (datasets@git+\u003ca href=\"https://github.com/huggingface/datasets.git@a5e45816a72fbb20dc9122f88238ef8acba43ee3\"\u003ehttps://github.com/huggingface/datasets.git@a5e45816a72fbb20dc9122f88238ef8acba43ee3\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003eflair\u003c/li\u003e\n\u003cli\u003enumpy==1.20.3\u003c/li\u003e\n\u003cli\u003espacy ; en_core_web_sm\u003c/li\u003e\n\u003cli\u003enltk ; stopwords\u003c/li\u003e\n\u003cli\u003eLevenshtein\u003c/li\u003e\n\u003cli\u003ewget\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003e(Included in the install script of the code repository.)\u003c/p\u003e\n\u003ch3 id=\"user-content-link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/h3\u003e\n\u003cp\u003e\u003ca href=\"https://github.com/kvadityasrivatsa/gem_2021_simplification_task\"\u003ehttps://github.com/kvadityasrivatsa/gem_2021_simplification_task\u003c/a\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the computing infrastructure used to train your model (e.g., number of GPUs, GPU type and vRAM) and the time taken to train your final model.\u003c/em\u003e\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eNumber of GPUs: 4\u003c/li\u003e\n\u003cli\u003eType of GPUs: Nvidia GeForce GTX 1080 Ti\u003c/li\u003e\n\u003cli\u003evRAM: 64GB\u003c/li\u003e\n\u003cli\u003eTime taken for training (for 20 epochs): ~27 hours\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-evaluation-details\"\u003eEvaluation Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eHow were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe model was trained on the WikiLarge corpus and evaluated on the validation and test sets of TurkCorups (Xu et. al., 2016) (\u003ca href=\"https://gem-benchmark.com/data_cards/TURK\"\u003ehttps://gem-benchmark.com/data_cards/TURK\u003c/a\u003e) \u0026#x26; (\u003ca href=\"https://huggingface.co/datasets/turk\"\u003ehttps://huggingface.co/datasets/turk\u003c/a\u003e) as well as the ASSET (Alva-Manchego et. al., 2020) (\u003ca href=\"https://gem-benchmark.com/data_cards/ASSET\"\u003ehttps://gem-benchmark.com/data_cards/ASSET\u003c/a\u003e) \u0026#x26; (\u003ca href=\"https://huggingface.co/datasets/asset\"\u003ehttps://huggingface.co/datasets/asset\u003c/a\u003e) dataset. The evaluation metrics used were BLEU (Papineni et. al., 2002) and SARI (Xu et. al., 2016), with the best model checkpoint chosen according to the respective SARI score on the validation sets. The evaluation for both metrics were carried out using the EASSE (Alva-Manchego et. al., 2019) package (link: \u003ca href=\"https://github.com/feralvam/easse\"\u003ehttps://github.com/feralvam/easse\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eModel Performance (on automatic metrics BLEU and SARI):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eon ASSET-test | BLEU: 66.722 | SARI: 38.922\u003c/li\u003e\n\u003cli\u003eon ASSET-validation | BLEU: 73.935 | SARI: 37.588\u003c/li\u003e\n\u003cli\u003eon TurkCorpus-test | BLEU: 67.667 | SARI: 39.695\u003c/li\u003e\n\u003cli\u003eon TurkCorpus-validation | BLEU: 75.672 | SARI: 39.407\u003c/li\u003e\n\u003c/ul\u003e\n","title":"SimpleNER","type":"Shared Task 2021","background":"Finetuned Transformer architecture for simplification."}},"__N_SSG":true},"page":"/model_cards/[id]","query":{"id":"SimpleNER"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#social-impact\"\u003eSocial Impact\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#additional-data\"\u003eAdditional Data\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#training-process\"\u003eTraining Process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#real-world-use\"\u003eReal-World Use\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#measuring-impact\"\u003eMeasuring Impact\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#reproducibility\"\u003eReproducibility\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#model-description-1\"\u003eModel Description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#model-hyperparameters\"\u003eModel Hyperparameters\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-details\"\u003eDataset Details\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluation-details\"\u003eEvaluation Details\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-model-description\"\u003eModel Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"https://aclanthology.org/2021.gem-1.14/\"\u003eSimpleNER Sentence Simplification System for GEM 2021\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eCreators:\u003c/strong\u003e K V Aditya Srivatsa, Monil Gokani, Manish Shrivastava\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e K V Aditya Srivatsa (\u003ca href=\"mailto:k.v.aditya@research.iiit.ac.in\"\u003ek.v.aditya@research.iiit.ac.in\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-social-impact\"\u003eSocial Impact\u003c/h2\u003e\n\u003cp\u003eIn this section, we ask you to provide information on all of the steps that went into obtaining your models, especially in regards to how they would affect a user’s interactions with the technology if it was deployed in a live system.\u003c/p\u003e\n\u003cp\u003eThen, please choose one of these steps to analyze in terms of possible negative impacts on potential direct and indirect users, and propose a test to evaluate the existence and magnitude of that impact. We provide examples of such analyses in the following two paragraphs.\u003c/p\u003e\n\u003cp\u003eConsider for example a model pre-trained on English Wikipedia and fine-tuned on a summarization dataset, and imagine that such a model is deployed in a news website to provide automatic summaries. Given the gender gap on Wikipedia, we can imagine two possible effects:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003ethe model could systematically produce summaries with lower ROUGE scores when compared to a reference for articles describing women than it would for articles describing men.\u003c/li\u003e\n\u003cli\u003ethe model may be less likely to name the subject of an article if the subject is a woman than if the subject is a man.\nWe can measure either of these effects by running an entity linking system on the articles, for example, and comparing subsets of the test set where the gender of the actors is known.\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eAlternatively, imagine that a system that is fine-tuned on WikiAuto, which aligns English Wikipedia text to its Simple English Wikipedia version, is used in an attempt to make a blog where the writer talks about their personal experience more accessible. How well does the model handle the shift from a third person to a first person point of view? One way we can check whether the behavior is roughly the same is by comparing the copy rate (number of words from the input that are re-used in the output) between the Wikipedia and blog setting.\u003c/p\u003e\n\u003cp\u003eWe ask you to take a similar approach to analyzing your model:\u003c/p\u003e\n\u003ch3 id=\"user-content-additional-data\"\u003eAdditional Data\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eIf you used a model that was pre-trained on additional data or used additional data, please describe it. Provide a link to a datasheet or data statement if there is one available, otherwise, provide as much relevant information as possible on the source of the data, the people represented in it, its languages, licensing, pre-processing, etc.\u003c/em\u003e\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003e\n\u003cp\u003eWikiLarge Training data (Zhang and Lapata, 2017) (\u003ca href=\"https://github.com/XingxingZhang/dress\"\u003ehttps://github.com/XingxingZhang/dress\u003c/a\u003e):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eAligned complex-simple sentence pairs: 296,402\u003c/li\u003e\n\u003cli\u003eCreated by aggregating several simplification corpora: Kauchak (2013), Woodsend and Lapata (2011), and the WikiSmall dataset (Zhu, 2010)\u003c/li\u003e\n\u003cli\u003eLanguages: English\u003c/li\u003e\n\u003cli\u003eLicensing: The MIT License (MIT) Copyright © 2018 Zalando SE\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eFastText pretrained word-vectors (Mikolov et. al., 2018) (\u003ca href=\"https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip\"\u003ehttps://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip\u003c/a\u003e):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e1 million word vectors trained on Wikipedia 2017, UMBC webbase corpus and statmt.org news dataset (16B tokens).\u003c/li\u003e\n\u003cli\u003eLanguage: English\u003c/li\u003e\n\u003cli\u003eLicense: Creative Commons Attribution-Share-Alike License 3.0.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eFlair Ontonotes NER Tagger (Schweter and Akbik, 2020) (\u003ca href=\"https://github.com/flairNLP/flair\"\u003ehttps://github.com/flairNLP/flair\u003c/a\u003e):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eUses train, development and test sets of CoNLL 2012 shared task for coreference resolution (Pradhan et al., 2012)\u003c/li\u003e\n\u003cli\u003eLanguage: English\u003c/li\u003e\n\u003cli\u003eLicense: The MIT License (MIT) Copyright © 2018 Zalando SE\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-training-process\"\u003eTraining Process\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the training or fine tuning setup, including whether the final model was trained on a single task or in a multi-task setting. If a data augmentation technique was used, describe the technique\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe transformer model is trained (single-task) from scratch on the ~290k sentence pairs from the WikiLarge corpus (already having the named-entities replaced by special tokens), with pre-trained FastText embeddings loaded.\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003eNamed-Entity tagging:\nInstances of named entities identified by Flair’s Ontonotes model, occurring on the complex half of the data, are replaced by respective special tokens:\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eEg. New York --\u003e LOC (tag predicted by NER model) --\u003e LOC@1 (‘New York’ replaced by unique tag, while the respective token-tag mapping is saved sentence-wise for de-tagging after output generation)\u003c/p\u003e\n\u003col start=\"2\"\u003e\n\u003cli\u003eAdding Control Tokens:\nOnce the sentence pairs have been pruned and NER-tagged, four control attributes (Martin et. al., 2020) (\u003ca href=\"https://arxiv.org/pdf/1910.02677\"\u003ehttps://arxiv.org/pdf/1910.02677\u003c/a\u003e) are calculated between each of the complex and simple sentence pairs:\n\u003cul\u003e\n\u003cli\u003eCompression Ratio: Simple ratio between token lengths of the simple and complex sentences. (recorded as: \u0026#x3C;NbChars_x.xx\u003e)\u003c/li\u003e\n\u003cli\u003eLevenshtein Distance: Normalised character level Levenshtein distance between the complex and simple sentence. (recorded as: \u0026#x3C;LevSim_x.xx\u003e )\u003c/li\u003e\n\u003cli\u003eWord Rank: Ratio of the average of the third-quartile log ranks of the words (excluding stopwords and special tokens) in the simple and complex sentence (extracted from the order of FastText word embeddings being used for training). (recorded as: \u0026#x3C;WordRank_x.xx\u003e)\u003c/li\u003e\n\u003cli\u003eDependency Tree Depth: Ratio of the maximum dependency tree depth of the simple and complex sentence. (recorded as: \u0026#x3C;DepTreeDepth_x.xx\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eEg.\nComplex: it is particularly famous for the cultivation of kiwifruit .\nSimple: it is mostly famous for the growing of kiwifruit .\u003c/p\u003e\n\u003cp\u003eRewriting complex:\n\u0026#x3C;NbChars_0.8\u003e \u0026#x3C;LevSim_0.76\u003e \u0026#x3C;WordRank_0.79\u003e it is particularly famous for the cultivation of kiwifruit .\u003c/p\u003e\n\u003ch3 id=\"user-content-real-world-use\"\u003eReal-World Use\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe a possible real-use application of your model, then choose one of the steps (e.g. choice of the pre-trained model, data source, data augmentation, training loss, etc.) above and describe a negative impact it may have on the user experience\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eText Simplification has always found crucial importance in making it easier for people with cognitive disabilities such as aphasia, dyslexia and autism, to read and understand text. Additionally, it proves helpful for second language learners, especially in public service centres such as airports or health clinics. A model such as ours can be utilized to produce point-to-point simplifications for such people, and therefore must be made more accessible, but more importantly, must be made more reliable and faithful in terms of the original information/ message it must convey.  Even with nearly 300k aligned sentence pairs, train sets such as WikiLarge fail to help large transformer models being trained from scratch to generalize well,  which causes them to suffer from data sparsity, which can be observed with words in the vocabulary having very high word ranks (ordered by frequency), such as named entities. We attempted to mitigate this issue by replacing the named entities in our data with (18) special tokens before training, which are promptly replaced back with the original tokens. This helps to reduce the model vocabulary, and allows for greater generalization.  Although this step is beneficial to the overall performance of the model, it also brings with it a negative effect. The idea of replacing NEs is based on the assumption that such tokens or phrases need not be modified or deleted during the task of simplification. Although this is true for the most part when it comes to evaluation sets like the TurkCorpus and ASSET, potentially complex named entities need to be simplified or elaborated upon in real life uses, especially if the use involves second language learners or people with reading disabilities.  Eg. Consider an input statement as “Counters 3-6 are reserved for Commissioned Military Personnel.” Ideally, a simplified sentence should read something along the lines of “Counters 3-6 may be used only by army officers.” However, since Commissioned Military Personnel would be excluded from the simplification process by virtue of being a named entity, this simplification would not take place.\u003c/p\u003e\n\u003ch3 id=\"user-content-measuring-impact\"\u003eMeasuring Impact\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003ePropose a method to test the magnitude of the impact identified in your previous answer\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eGauging the need for elaboration or replacement of a named entity in a sentence requires a strong proficiency in the parent language and at least some knowledge of the named entity itself. Therefore this task is best suited for manual evaluation where annotators must be asked to rate the simplification upon how well were the constituent named entities simplified or elaborated. This can be done using a simple Likert-scale.\u003c/p\u003e\n\u003cp\u003eHowever, as manual evaluation is slow and resource intensive, there is a need for automated evaluation methods to approximate human judgment. A possible way to quantify the need for replacement or elaboration of named entities is the average similarity the NE holds to the other words in the sentence. If a NE with a high word rank shows very little similarity to the words in the sentence, it is likely that the sentence does not attempt to elaborate/ explain the NE. On the other hand, a high average similarity indicates that there are other words in the sentence that are related to the NE, and are most likely being used to elaborate upon the NE. A simple formulation of this measure can be the product of the word rank of the NE and its average similarity with other words in the sentence. Still, there is clearly a need for more nuanced methods of evaluation for text simplification, which capture the performance of a model on such attributes and expected operations.\u003c/p\u003e\n\u003ch2 id=\"user-content-reproducibility\"\u003eReproducibility\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eIn this section, we ask you to provide any information that would be required for someone to reproduce your model and experimental results. These questions are derived from the suggested model card in [1] and the reproducibility checklist in [2].\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Dodge, Jesse, et al. \"Show Your Work: Improved Reporting of Experimental Results.\" EMNLP. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003e[1] Mitchell et al. \"Model cards for model reporting.\" Proceedings of the conference on fairness, accountability, and transparency. 2019.\u003c/em\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-model-description-1\"\u003eModel Description\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide basic information about your model, including (1) the model type (e.g., BART, Pointer Network), (2) model version/date if multiple versions are available, (3) training algorithms used. Please cite papers or other resources where further information about the model can be found. Also include the model license and citation details.\u003c/em\u003e\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\n\u003cp\u003eModel architecture: Transformer (Vaswani et. al., 2017) (Facebook’s FairSequence Implementation)\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eEncoder and decoder layers: 6\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eEncoder and decoder attentions heads: 6\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eEncoder and decoder embedding dimensionality: 300\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eEncoder and decoder fully-connected layer dimensionality: 2048\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eTraining Algorithm: Fairseq task set to ‘translation’. Complex and simple sentence pairs set as source and target language pairs for translation.\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eVersion: fairseq@v0.10.0 (\u003ca href=\"https://github.com/pytorch/fairseq@v0.10.0\"\u003ehttps://github.com/pytorch/fairseq@v0.10.0\u003c/a\u003e)\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eModel architecture citation:\n@article{DBLP:journals/corr/VaswaniSPUJGKP17,\nauthor    = {Ashish Vaswani and\nNoam Shazeer and\nNiki Parmar and\nJakob Uszkoreit and\nLlion Jones and\nAidan N. Gomez and\nLukasz Kaiser and\nIllia Polosukhin},\ntitle     = {Attention Is All You Need},\njournal   = {CoRR},\nvolume    = {abs/1706.03762},\nyear      = {2017},\nurl       = {\u003ca href=\"http://arxiv.org/abs/1706.03762%7D\"\u003ehttp://arxiv.org/abs/1706.03762}\u003c/a\u003e,\narchivePrefix = {arXiv},\neprint    = {1706.03762},\ntimestamp = {Sat, 23 Jan 2021 01:20:40 +0100},\nbiburl    = {\u003ca href=\"https://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib%7D\"\u003ehttps://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib}\u003c/a\u003e,\nbibsource = {dblp computer science bibliography, \u003ca href=\"https://dblp.org%7D\"\u003ehttps://dblp.org}\u003c/a\u003e\n}\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eModel implementation citation:\n@inproceedings{ott2019fairseq,\ntitle = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},\nauthor = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli},\nbooktitle = {Proceedings of NAACL-HLT 2019: Demonstrations},\nyear = {2019},\n}\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eModel implementation license: The MIT License (MIT) Copyright © 2018 Zalando SE (\u003ca href=\"https://github.com/pytorch/fairseq/blob/master/LICENSE\"\u003ehttps://github.com/pytorch/fairseq/blob/master/LICENSE\u003c/a\u003e)\u003c/p\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-model-hyperparameters\"\u003eModel Hyperparameters\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eProvide the range of hyperparameters that would be required to reproduce your final model (e.g., optimizer used, number of epochs, learning rate, etc.). If hyperparameter search was used, please describe (1) the bounds for each hyperparameter, (2) the number of hyperparameter search trials, (3) the method for choosing hyperparameter values (e.g., uniform sampling, manual tuning, evolutionary optimization, etc.).\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eNumber of epochs, learning rate, optimizer, dropout, number of warmup updates. (No hyperparameter search was used)\u003c/p\u003e\n\u003ch3 id=\"user-content-the-hyperparameter-specifications-for-best-performing-models\"\u003eThe Hyperparameter specifications for best performing models\u003c/h3\u003e\n\u003cul\u003e\n\u003cli\u003eNumber of epochs: 20 (best checkpoint: epoch_13)\u003c/li\u003e\n\u003cli\u003eLearning rate: 0.00011\u003c/li\u003e\n\u003cli\u003eOptimizer: adam (betas: (0.9, 0.999) ; eps: (1e-8))\u003c/li\u003e\n\u003cli\u003eDropout: 0.2\u003c/li\u003e\n\u003cli\u003eWarmup updates: 4000\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-dataset-details\"\u003eDataset Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude relevant training data statistics (e.g., number of samples used, whether some subsets of the dataset were discarded), the training/validation/test splits for the number of samples and any pre-processing steps if used.\u003c/em\u003e\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003eTraining Dataset: WikiLarge (Zhang and Lapata, 2017) train set  - Aligned complex-simple sentence pairs: 296,402  - Link: \u003ca href=\"https://github.com/XingxingZhang/dress\"\u003ehttps://github.com/XingxingZhang/dress\u003c/a\u003e  2. Validation and Testing Datasets:     a. TurkCorpus (Xu et. al., 2016) (\u003ca href=\"https://gem-benchmark.com/data_cards/TURK\"\u003ehttps://gem-benchmark.com/data_cards/TURK\u003c/a\u003e):         i. Test split: 359 complex sentences each with 8 different simplifications.         ii. Validation split: 2000 complex sentences each with 8 different simplifications.     b. ASSET (Alva-Manchego et. al., 2020) (\u003ca href=\"https://gem-benchmark.com/data_cards/ASSET\"\u003ehttps://gem-benchmark.com/data_cards/ASSET\u003c/a\u003e):          i. Test split: 359 complex sentences each with 10 different simplifications.         ii. Validation split: 2000 complex sentences each with 10 different simplifications.  3. Preprocessing:  - All three (train, validation and test) data splits were converted to lowercase.  - For train set (WikiLarge):      - Sentence pairs with either instance having token length lower than 3 were removed.      - Sentence pairs with compression ratio (len(target)/len(source)) out of the closed bounds [0.2, 1.5] were removed.  - For all three data splits, flair’s ontonotes NER tagger (\u003ca href=\"https://github.com/flairNLP/flair\"\u003ehttps://github.com/flairNLP/flair\u003c/a\u003e) was used to replace all detectable named entities with respective special tokens (see system description) for training.\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-dependencies-and-external-libraries\"\u003eDependencies and External Libraries\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eInclude a specification of library dependencies\u003c/em\u003e\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003etorch==1.7.0\u003c/li\u003e\n\u003cli\u003efairseq (git+\u003ca href=\"https://github.com/pytorch/fairseq@v0.10.0\"\u003ehttps://github.com/pytorch/fairseq@v0.10.0\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003eeasse (git+\u003ca href=\"https://github.com/feralvam/easse.git\"\u003ehttps://github.com/feralvam/easse.git\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003edatasets (datasets@git+\u003ca href=\"https://github.com/huggingface/datasets.git@a5e45816a72fbb20dc9122f88238ef8acba43ee3\"\u003ehttps://github.com/huggingface/datasets.git@a5e45816a72fbb20dc9122f88238ef8acba43ee3\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003eflair\u003c/li\u003e\n\u003cli\u003enumpy==1.20.3\u003c/li\u003e\n\u003cli\u003espacy ; en_core_web_sm\u003c/li\u003e\n\u003cli\u003enltk ; stopwords\u003c/li\u003e\n\u003cli\u003eLevenshtein\u003c/li\u003e\n\u003cli\u003ewget\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003e(Included in the install script of the code repository.)\u003c/p\u003e\n\u003ch3 id=\"user-content-link-to-downloadable-source-code\"\u003eLink to downloadable source code\u003c/h3\u003e\n\u003cp\u003e\u003ca href=\"https://github.com/kvadityasrivatsa/gem_2021_simplification_task\"\u003ehttps://github.com/kvadityasrivatsa/gem_2021_simplification_task\u003c/a\u003e\u003c/p\u003e\n\u003ch3 id=\"user-content-computing-infrastructure-used\"\u003eComputing Infrastructure Used\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eDescribe the computing infrastructure used to train your model (e.g., number of GPUs, GPU type and vRAM) and the time taken to train your final model.\u003c/em\u003e\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eNumber of GPUs: 4\u003c/li\u003e\n\u003cli\u003eType of GPUs: Nvidia GeForce GTX 1080 Ti\u003c/li\u003e\n\u003cli\u003evRAM: 64GB\u003c/li\u003e\n\u003cli\u003eTime taken for training (for 20 epochs): ~27 hours\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-evaluation-details\"\u003eEvaluation Details\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eHow were your models evaluated? Please include evaluation metric details (including links to code), train/validation/test splits, and model performance on both test and validation sets. If more than one model was trained and evaluated, what was the number of training and evaluation runs, and the variance in scores? If human evaluation was used, please describe the experimental setup.\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eThe model was trained on the WikiLarge corpus and evaluated on the validation and test sets of TurkCorups (Xu et. al., 2016) (\u003ca href=\"https://gem-benchmark.com/data_cards/TURK\"\u003ehttps://gem-benchmark.com/data_cards/TURK\u003c/a\u003e) \u0026#x26; (\u003ca href=\"https://huggingface.co/datasets/turk\"\u003ehttps://huggingface.co/datasets/turk\u003c/a\u003e) as well as the ASSET (Alva-Manchego et. al., 2020) (\u003ca href=\"https://gem-benchmark.com/data_cards/ASSET\"\u003ehttps://gem-benchmark.com/data_cards/ASSET\u003c/a\u003e) \u0026#x26; (\u003ca href=\"https://huggingface.co/datasets/asset\"\u003ehttps://huggingface.co/datasets/asset\u003c/a\u003e) dataset. The evaluation metrics used were BLEU (Papineni et. al., 2002) and SARI (Xu et. al., 2016), with the best model checkpoint chosen according to the respective SARI score on the validation sets. The evaluation for both metrics were carried out using the EASSE (Alva-Manchego et. al., 2019) package (link: \u003ca href=\"https://github.com/feralvam/easse\"\u003ehttps://github.com/feralvam/easse\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eModel Performance (on automatic metrics BLEU and SARI):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eon ASSET-test | BLEU: 66.722 | SARI: 38.922\u003c/li\u003e\n\u003cli\u003eon ASSET-validation | BLEU: 73.935 | SARI: 37.588\u003c/li\u003e\n\u003cli\u003eon TurkCorpus-test | BLEU: 67.667 | SARI: 39.695\u003c/li\u003e\n\u003cli\u003eon TurkCorpus-validation | BLEU: 75.672 | SARI: 39.407\u003c/li\u003e\n\u003c/ul\u003e\n","title":"SimpleNER","type":"Shared Task 2021","background":"Finetuned Transformer architecture for simplification."}},"__N_SSG":true},"page":"/model_cards/[id]","query":{"id":"SimpleNER"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/nl_augmenter.html b/nl_augmenter.html
index a72b2377..f4f43e1b 100644
--- a/nl_augmenter.html
+++ b/nl_augmenter.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>NL-Augmenter</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/nl_augmenter-908a5b0d2875bb36.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX nl_augmenter_background__ZLeqH"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q nl_augmenter_heading__7Z5D1">NL-Augmenter 🦎 → 🐍</span><span class="utils_smallSpace__dcJPu"></span><div><p>The NL-Augmenter is a collaborative effort intended to add transformations of datasets dealing with natural language. Transformations augment text datasets in diverse ways, including: introducing spelling errors, translating to a different language, randomizing names and numbers, paraphrasing, changing the style ... and whatever creative augmentation you contribute. We invite submissions of transformations to this framework by way of a <a href="https://github.com/GEM-benchmark/NL-Augmenter">GitHub</a> pull request, through August 31, 2021. <strong>All submitters of accepted transformations (and filters) will be included as co-authors on a paper announcing this framework</strong>.</p>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>NL-Augmenter</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/nl_augmenter-dabf1f7163a4c1fd.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX nl_augmenter_background__ZLeqH"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q nl_augmenter_heading__7Z5D1">NL-Augmenter 🦎 → 🐍</span><span class="utils_smallSpace__dcJPu"></span><div><p>The NL-Augmenter is a collaborative effort intended to add transformations of datasets dealing with natural language. Transformations augment text datasets in diverse ways, including: introducing spelling errors, translating to a different language, randomizing names and numbers, paraphrasing, changing the style ... and whatever creative augmentation you contribute. We invite submissions of transformations to this framework by way of a <a href="https://github.com/GEM-benchmark/NL-Augmenter">GitHub</a> pull request, through August 31, 2021. <strong>All submitters of accepted transformations (and filters) will be included as co-authors on a paper announcing this framework</strong>.</p>
 <p>The framework is hosted as a <a href="https://github.com/GEM-benchmark/NL-Augmenter">GitHub repository</a>. The organizers can be contacted at <a href="mailto:nl-augmenter@googlegroups.com">nl-augmenter@googlegroups.com</a>.</p>
 <p><strong>Submission timeline</strong></p>
 <p><code>August 31, 2021</code> Pull request must be opened to be eligible for inclusion in the framework and associated paper</p>
@@ -33,4 +33,4 @@ <h2 id="user-content-organization">Organization</h2>
 <li>Jinho D. Choi (Emory University)</li>
 <li>Abinaya Mahendiran (NEXT Labs, Mphasis)</li>
 </ul>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"nlAugmenterData":{"contentHtml":"\u003cp\u003eThe NL-Augmenter is a collaborative effort intended to add transformations of datasets dealing with natural language. Transformations augment text datasets in diverse ways, including: introducing spelling errors, translating to a different language, randomizing names and numbers, paraphrasing, changing the style ... and whatever creative augmentation you contribute. We invite submissions of transformations to this framework by way of a \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter\"\u003eGitHub\u003c/a\u003e pull request, through August 31, 2021. \u003cstrong\u003eAll submitters of accepted transformations (and filters) will be included as co-authors on a paper announcing this framework\u003c/strong\u003e.\u003c/p\u003e\n\u003cp\u003eThe framework is hosted as a \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter\"\u003eGitHub repository\u003c/a\u003e. The organizers can be contacted at \u003ca href=\"mailto:nl-augmenter@googlegroups.com\"\u003enl-augmenter@googlegroups.com\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eSubmission timeline\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eAugust 31, 2021\u003c/code\u003e Pull request must be opened to be eligible for inclusion in the framework and associated paper\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eSeptember 22, 2021\u003c/code\u003e Review process for pull request above must be complete\u003c/p\u003e\n\u003cp\u003eA transformation can be revised between the pull request submission and pull request merge deadlines. We will provide reviewer feedback to help with the revisions.\u003c/p\u003e\n\u003cp\u003eThe transformations which are already accepted to NL-Augmenter are summarized in \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/transformations/README.md\"\u003ethis table\u003c/a\u003e. Transformations undergoing review can be seen as \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/pulls\"\u003epull requests\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-motivation\"\u003eMotivation\u003c/h2\u003e\n\u003cp\u003eNatural Language Transformation or Augmentation comprises methods for increasing the variety of training data for natural language tasks without having to manually collect additional examples. Most strategies either modify existing data, called transformations, or create synthetic data, for example through counterfactual data augmentation, with the aim of having the extended data act as a regularizer to reduce overfitting or biases when training ML models. However, the space of natural language is discrete and simple perturbations cannot capture the entirety and complexity of natural language phenomena.\nDue to this complexity, we all need to work together to ensure that datasets can be properly evaluated. Toward this goal, NL-Augmenter seeks to gather transformations, perturbations, and filters which can generate additional data to serve for training or to test model robustness. Following the success of open collaborative efforts like \u003ca href=\"https://github.com/google/BIG-bench\"\u003eBIG-bench\u003c/a\u003e and \u003ca href=\"https://arxiv.org/pdf/2010.02353.pdf\"\u003emany\u003c/a\u003e others, we invite submissions via a participant driven repository.\u003c/p\u003e\n\u003ch2 id=\"user-content-task-specificity\"\u003eTask Specificity\u003c/h2\u003e\n\u003cp\u003eNLP tasks often radically differ in their linguistic properties of interest — changing the word “happy” to “very happy” in an input is more relevant for sentiment analysis than for summarization. However, many transformations and filters are relevant to many datasets and hence NL-Augmenter is designed to be flexible enough to encourage \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/interfaces\"\u003eformat specific\u003c/a\u003e transformations. Such a mechanism also enables quick and rapid testing of transformations over models (and datasets) which share similar formats.\u003c/p\u003e\n\u003ch2 id=\"user-content-publication-of-transformations\"\u003ePublication of transformations\u003c/h2\u003e\n\u003cp\u003eA paper will be written describing the framework and analyzing the performance of common NLP models. All submitters of accepted contributions will be invited to be co-authors on this paper. The framework itself will provide a final artifact, which we hope will prove useful for data augmentation and generating evaluation suites to evaluate robustness of models.\u003c/p\u003e\n\u003ch2 id=\"user-content-submission-review-process\"\u003eSubmission review process\u003c/h2\u003e\n\u003cp\u003eTransformations will be subject to a lightweight, public, and non-anonymous review process. Communication will be via discussion on the transformation's pull request. Reviews will focus on technical correctness and completeness, basic clarity of the proposal, and whether the transformation plausibly generates what it aims to generate.\u003c/p\u003e\n\u003cp\u003eEach transformation will receive two reviews and the transformation may be edited in response to the reviews. Final decisions will then be made by a meta-review committee. Authors of transformation submissions may be asked to join the reviewer pool after their own transformation is accepted.\nCheck the review criteria on our \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/docs/doc.md#review-criteria-for-submissions\"\u003eGitHub page\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-organization\"\u003eOrganization\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003eKaustubh Dhole (Amelia R\u0026#x26;D)\u003c/li\u003e\n\u003cli\u003eSebastian Gehrmann (Google Research)\u003c/li\u003e\n\u003cli\u003eJascha Sohl-Dickstein (Google Brain)\u003c/li\u003e\n\u003cli\u003eVarun Prashant Gangal (LTI, Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eTongshuang Wu (University of Washington)\u003c/li\u003e\n\u003cli\u003eSimon Mille (Universitat Pompeu Fabra)\u003c/li\u003e\n\u003cli\u003eZhenhao Li (Imperial College, London)\u003c/li\u003e\n\u003cli\u003eAadesh Gupta (Amelia R\u0026#x26;D)\u003c/li\u003e\n\u003cli\u003eSamson Tan (National University of Singapore \u0026#x26; Salesforce Research)\u003c/li\u003e\n\u003cli\u003eSaad Mahmood (Trivago R\u0026#x26;D)\u003c/li\u003e\n\u003cli\u003eAshish Shrivastava (Amelia R\u0026#x26;D)\u003c/li\u003e\n\u003cli\u003eOndrej Dusek (Charles University)\u003c/li\u003e\n\u003cli\u003eJinho D. Choi (Emory University)\u003c/li\u003e\n\u003cli\u003eAbinaya Mahendiran (NEXT Labs, Mphasis)\u003c/li\u003e\n\u003c/ul\u003e\n"}},"__N_SSG":true},"page":"/nl_augmenter","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"nlAugmenterData":{"contentHtml":"\u003cp\u003eThe NL-Augmenter is a collaborative effort intended to add transformations of datasets dealing with natural language. Transformations augment text datasets in diverse ways, including: introducing spelling errors, translating to a different language, randomizing names and numbers, paraphrasing, changing the style ... and whatever creative augmentation you contribute. We invite submissions of transformations to this framework by way of a \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter\"\u003eGitHub\u003c/a\u003e pull request, through August 31, 2021. \u003cstrong\u003eAll submitters of accepted transformations (and filters) will be included as co-authors on a paper announcing this framework\u003c/strong\u003e.\u003c/p\u003e\n\u003cp\u003eThe framework is hosted as a \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter\"\u003eGitHub repository\u003c/a\u003e. The organizers can be contacted at \u003ca href=\"mailto:nl-augmenter@googlegroups.com\"\u003enl-augmenter@googlegroups.com\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eSubmission timeline\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eAugust 31, 2021\u003c/code\u003e Pull request must be opened to be eligible for inclusion in the framework and associated paper\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eSeptember 22, 2021\u003c/code\u003e Review process for pull request above must be complete\u003c/p\u003e\n\u003cp\u003eA transformation can be revised between the pull request submission and pull request merge deadlines. We will provide reviewer feedback to help with the revisions.\u003c/p\u003e\n\u003cp\u003eThe transformations which are already accepted to NL-Augmenter are summarized in \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/transformations/README.md\"\u003ethis table\u003c/a\u003e. Transformations undergoing review can be seen as \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/pulls\"\u003epull requests\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-motivation\"\u003eMotivation\u003c/h2\u003e\n\u003cp\u003eNatural Language Transformation or Augmentation comprises methods for increasing the variety of training data for natural language tasks without having to manually collect additional examples. Most strategies either modify existing data, called transformations, or create synthetic data, for example through counterfactual data augmentation, with the aim of having the extended data act as a regularizer to reduce overfitting or biases when training ML models. However, the space of natural language is discrete and simple perturbations cannot capture the entirety and complexity of natural language phenomena.\nDue to this complexity, we all need to work together to ensure that datasets can be properly evaluated. Toward this goal, NL-Augmenter seeks to gather transformations, perturbations, and filters which can generate additional data to serve for training or to test model robustness. Following the success of open collaborative efforts like \u003ca href=\"https://github.com/google/BIG-bench\"\u003eBIG-bench\u003c/a\u003e and \u003ca href=\"https://arxiv.org/pdf/2010.02353.pdf\"\u003emany\u003c/a\u003e others, we invite submissions via a participant driven repository.\u003c/p\u003e\n\u003ch2 id=\"user-content-task-specificity\"\u003eTask Specificity\u003c/h2\u003e\n\u003cp\u003eNLP tasks often radically differ in their linguistic properties of interest — changing the word “happy” to “very happy” in an input is more relevant for sentiment analysis than for summarization. However, many transformations and filters are relevant to many datasets and hence NL-Augmenter is designed to be flexible enough to encourage \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/interfaces\"\u003eformat specific\u003c/a\u003e transformations. Such a mechanism also enables quick and rapid testing of transformations over models (and datasets) which share similar formats.\u003c/p\u003e\n\u003ch2 id=\"user-content-publication-of-transformations\"\u003ePublication of transformations\u003c/h2\u003e\n\u003cp\u003eA paper will be written describing the framework and analyzing the performance of common NLP models. All submitters of accepted contributions will be invited to be co-authors on this paper. The framework itself will provide a final artifact, which we hope will prove useful for data augmentation and generating evaluation suites to evaluate robustness of models.\u003c/p\u003e\n\u003ch2 id=\"user-content-submission-review-process\"\u003eSubmission review process\u003c/h2\u003e\n\u003cp\u003eTransformations will be subject to a lightweight, public, and non-anonymous review process. Communication will be via discussion on the transformation's pull request. Reviews will focus on technical correctness and completeness, basic clarity of the proposal, and whether the transformation plausibly generates what it aims to generate.\u003c/p\u003e\n\u003cp\u003eEach transformation will receive two reviews and the transformation may be edited in response to the reviews. Final decisions will then be made by a meta-review committee. Authors of transformation submissions may be asked to join the reviewer pool after their own transformation is accepted.\nCheck the review criteria on our \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/docs/doc.md#review-criteria-for-submissions\"\u003eGitHub page\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-organization\"\u003eOrganization\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003eKaustubh Dhole (Amelia R\u0026#x26;D)\u003c/li\u003e\n\u003cli\u003eSebastian Gehrmann (Google Research)\u003c/li\u003e\n\u003cli\u003eJascha Sohl-Dickstein (Google Brain)\u003c/li\u003e\n\u003cli\u003eVarun Prashant Gangal (LTI, Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eTongshuang Wu (University of Washington)\u003c/li\u003e\n\u003cli\u003eSimon Mille (Universitat Pompeu Fabra)\u003c/li\u003e\n\u003cli\u003eZhenhao Li (Imperial College, London)\u003c/li\u003e\n\u003cli\u003eAadesh Gupta (Amelia R\u0026#x26;D)\u003c/li\u003e\n\u003cli\u003eSamson Tan (National University of Singapore \u0026#x26; Salesforce Research)\u003c/li\u003e\n\u003cli\u003eSaad Mahmood (Trivago R\u0026#x26;D)\u003c/li\u003e\n\u003cli\u003eAshish Shrivastava (Amelia R\u0026#x26;D)\u003c/li\u003e\n\u003cli\u003eOndrej Dusek (Charles University)\u003c/li\u003e\n\u003cli\u003eJinho D. Choi (Emory University)\u003c/li\u003e\n\u003cli\u003eAbinaya Mahendiran (NEXT Labs, Mphasis)\u003c/li\u003e\n\u003c/ul\u003e\n"}},"__N_SSG":true},"page":"/nl_augmenter","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/panel.html b/panel.html
index 9b0e5619..456be0c9 100644
--- a/panel.html
+++ b/panel.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/pages/panel-91a3eda0e134807f.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/panel","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/pages/panel-91a3eda0e134807f.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/panel","query":{},"buildId":"V1edrWahfIsCPthpIgASU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/papers.html b/papers.html
index cc3694ab..e691a0cb 100644
--- a/papers.html
+++ b/papers.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM 💎 Papers</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/f04ddab54834d245.css" as="style"/><link rel="stylesheet" href="/_next/static/css/f04ddab54834d245.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/papers-21975dff751cae66.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Our publications.</span><span class="utils_smallSpace__dcJPu"></span><div><p>We are regularly publishing papers on aspects of GEM that describe findings or resources we find worthwhile to share. Please have a look below:</p></div><hr/><div class="papers_resources__cvuq3"><div><a href="https://aclanthology.org/2021.gem-1.10/"><a class="papers_resourceName__ncTs0">GEMv1 Overview</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">GEM Workshop 2021</small></div><div class="papers_resourceDetail__sGk32">This is our first overview paper, introducing GEM and the initial set of 13 tasks and associated baselines.</div><div class="papers_authors__LY55g"> Authors: All GEMv1 participants (see <a href="/team/2021">team list</a>)</div><div><a href="https://arxiv.org/abs/2206.11249"><a class="papers_resourceName__ncTs0">GEMv2 Overview</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">ArXiv</small></div><div class="papers_resourceDetail__sGk32">This is our second overview paper, expanding GEM to 40 tasks and 51 languages, introducing the automatic evaluation on the HuggingFace Hub.</div><div class="papers_authors__LY55g"> Authors: All GEMv2 participants (see <a href="/team">team list</a>)</div><div><a href="https://arxiv.org/abs/2202.06935"><a class="papers_resourceNameSmaller___Q6IP">Repairing the Cracked Foundation: A Survey of Obstacles in Evaluation Practices for Generated Text</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">ArXiv</small></div><div class="papers_resourceDetail__sGk32">In this survey paper, we discuss many of the principles underlying GEM and propose a set of best practices to follow for model evaluation. See also the <a href="https://ml-eval.github.io/assets/pdf/better_eval_in_NLG.pdf">shortened version</a> presented at the MLEval workshop at ICLR 2022.</div><div class="papers_authors__LY55g"> Authors: Sebastian Gehrmann, Elizabeth Clark, Thibault Sellam</div><div><a href="https://aclanthology.org/2021.gem-1.11/" target="_blank" class="papers_resourceName__ncTs0">Data Cards</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">GEM Workshop 2021</small></div><div class="papers_resourceDetail__sGk32">In &quot;Reusable Templates and Guides For Documenting Datasets and Models for Natural Language Processing and Generation: A Case Study of the HuggingFace and GEM Data and Model Cards&quot;, we describe the approach for data documentation in GEMv1 and the similar approach used by HuggingFace datasets.</div><div class="papers_authors__LY55g">Authors: Angelina McMillan-Major, Salomey Osei, Juan Diego Rodriguez, Pawan Sasanka Ammanamanchi, Sebastian Gehrmann, Yacine Jernite</div><div><a href="https://openreview.net/forum?id=CSi1eu_2q96" target="_blank" class="papers_resourceName__ncTs0">Evaluation Suites</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">NeurIPS 2021</small></div><div class="papers_resourceDetail__sGk32">In the paper &quot;Automatic Construction of Evaluation Suites for Natural Language Generation Datasets&quot;, we discuss how to build data collections that test robustness of models and show that they are much more expressive than typical test splits.</div><div class="papers_authors__LY55g">Authors: Simon Mille, Kaustubh Dhole, Saad Mahamood, Laura Perez-Beltrachini, Varun Gangal, Mihir Kale, Emiel van Miltenburg, Sebastian Gehrmann</div><div><a href="https://arxiv.org/abs/2112.02721"><a class="papers_resourceName__ncTs0">NL-Augmenter 🦎 → 🐍</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">GEM Workshop 2021</small></div><div class="papers_resourceDetail__sGk32">This was a collaborative &amp; participatory workshop collecting &gt;117 different ways to transform text and &gt;23 ways to filter out subpopulations of datasets.</div><div class="papers_authors__LY55g"> Participants and Authors: Listed in paper (see <a href="https://arxiv.org/abs/2112.02721">team list</a>)</div><div class="papers_authors__LY55g"> Steering Commitee: Kaustubh Dhole, Varun Gangal, Sebastian Gehrmann, Aadesh Gupta, Zhenhao Li, Saad Mahmood, Simon Mille, Jascha SohlDickstein, Ashish Srivastava, Samson Tan, Tongshuang Wu and Abinaya Mahendiran </div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/papers","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM 💎 Papers</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/f04ddab54834d245.css" as="style"/><link rel="stylesheet" href="/_next/static/css/f04ddab54834d245.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/papers-90207f0fdfe1fa9c.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Our publications.</span><span class="utils_smallSpace__dcJPu"></span><div><p>We are regularly publishing papers on aspects of GEM that describe findings or resources we find worthwhile to share. Please have a look below:</p></div><hr/><div class="papers_resources__cvuq3"><div><a class="papers_resourceName__ncTs0" href="https://aclanthology.org/2021.gem-1.10/">GEMv1 Overview</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">GEM Workshop 2021</small></div><div class="papers_resourceDetail__sGk32">This is our first overview paper, introducing GEM and the initial set of 13 tasks and associated baselines.</div><div class="papers_authors__LY55g"> Authors: All GEMv1 participants (see <a href="/team/2021">team list</a>)</div><div><a class="papers_resourceName__ncTs0" href="https://arxiv.org/abs/2206.11249">GEMv2 Overview</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">ArXiv</small></div><div class="papers_resourceDetail__sGk32">This is our second overview paper, expanding GEM to 40 tasks and 51 languages, introducing the automatic evaluation on the HuggingFace Hub.</div><div class="papers_authors__LY55g"> Authors: All GEMv2 participants (see <a href="/team">team list</a>)</div><div><a class="papers_resourceNameSmaller___Q6IP" href="https://arxiv.org/abs/2202.06935">Repairing the Cracked Foundation: A Survey of Obstacles in Evaluation Practices for Generated Text</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">ArXiv</small></div><div class="papers_resourceDetail__sGk32">In this survey paper, we discuss many of the principles underlying GEM and propose a set of best practices to follow for model evaluation. See also the <a href="https://ml-eval.github.io/assets/pdf/better_eval_in_NLG.pdf">shortened version</a> presented at the MLEval workshop at ICLR 2022.</div><div class="papers_authors__LY55g"> Authors: Sebastian Gehrmann, Elizabeth Clark, Thibault Sellam</div><div><a href="https://aclanthology.org/2021.gem-1.11/" target="_blank" class="papers_resourceName__ncTs0">Data Cards</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">GEM Workshop 2021</small></div><div class="papers_resourceDetail__sGk32">In &quot;Reusable Templates and Guides For Documenting Datasets and Models for Natural Language Processing and Generation: A Case Study of the HuggingFace and GEM Data and Model Cards&quot;, we describe the approach for data documentation in GEMv1 and the similar approach used by HuggingFace datasets.</div><div class="papers_authors__LY55g">Authors: Angelina McMillan-Major, Salomey Osei, Juan Diego Rodriguez, Pawan Sasanka Ammanamanchi, Sebastian Gehrmann, Yacine Jernite</div><div><a href="https://openreview.net/forum?id=CSi1eu_2q96" target="_blank" class="papers_resourceName__ncTs0">Evaluation Suites</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">NeurIPS 2021</small></div><div class="papers_resourceDetail__sGk32">In the paper &quot;Automatic Construction of Evaluation Suites for Natural Language Generation Datasets&quot;, we discuss how to build data collections that test robustness of models and show that they are much more expressive than typical test splits.</div><div class="papers_authors__LY55g">Authors: Simon Mille, Kaustubh Dhole, Saad Mahamood, Laura Perez-Beltrachini, Varun Gangal, Mihir Kale, Emiel van Miltenburg, Sebastian Gehrmann</div><div><a class="papers_resourceName__ncTs0" href="https://arxiv.org/abs/2112.02721">NL-Augmenter 🦎 → 🐍</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">GEM Workshop 2021</small></div><div class="papers_resourceDetail__sGk32">This was a collaborative &amp; participatory workshop collecting &gt;117 different ways to transform text and &gt;23 ways to filter out subpopulations of datasets.</div><div class="papers_authors__LY55g"> Participants and Authors: Listed in paper (see <a href="https://arxiv.org/abs/2112.02721">team list</a>)</div><div class="papers_authors__LY55g"> Steering Commitee: Kaustubh Dhole, Varun Gangal, Sebastian Gehrmann, Aadesh Gupta, Zhenhao Li, Saad Mahmood, Simon Mille, Jascha SohlDickstein, Ashish Srivastava, Samson Tan, Tongshuang Wu and Abinaya Mahendiran </div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/papers","query":{},"buildId":"V1edrWahfIsCPthpIgASU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/resources.html b/resources.html
index 3a4bdc60..8544bb97 100644
--- a/resources.html
+++ b/resources.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM 💎 Resources</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/5fcd590ccf37fef1.css" as="style"/><link rel="stylesheet" href="/_next/static/css/5fcd590ccf37fef1.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/resources-a2ebdb8ec0162ade.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Using our resources.</span><span class="utils_smallSpace__dcJPu"></span><div><p>As part of GEM, we are continuously producing resources for the research community. This page provides download links and brief explanations of each.</p></div><hr/><div class="resources_resources__7Vbk5"><div class="resources_resourceName__rTdCM"><a href="https://storage.googleapis.com/gem-benchmark/scores_and_outputs.zip">Outputs and Scores</a></div><div class="resources_resourceDetail__hI_Px">Our growing collection of millions of outputs and automatic scores for 20+ models across all GEM tasks. This resource is to be used for work on model evaluation, to characterize model shortcomings, and to provide baseline outputs for model comparison.</div><div class="resources_resourceName__rTdCM"><a href="https://huggingface.co/datasets/gem" target="_blank">HuggingFace Loader</a></div><div class="resources_resourceDetail__hI_Px">All our datasets can be loaded via this data loader implemented in HuggingFace datasets.</div><div class="resources_resourceName__rTdCM"><a href="https://www.tensorflow.org/datasets/catalog/gem" target="_blank">TFDS Loader</a></div><div class="resources_resourceDetail__hI_Px">All our datasets can be loaded via this data loader implemented in TFDS.</div><div class="resources_resourceName__rTdCM"><a href="https://github.com/GEM-benchmark/GEM-metrics" target="_blank">Metrics Repository</a></div><div class="resources_resourceDetail__hI_Px">Our package for model evaluation. If you want to compute our full suite of metrics with additional convenience functions like caching and parallelism, simply add your dataset to it and follow the instructions in the README.</div><div class="resources_resourceName__rTdCM"><a href="https://github.com/GEM-benchmark/NL-Augmenter" target="_blank">NL-Augmenter</a></div><div class="resources_resourceDetail__hI_Px">If you want to run robustness tests on your model and data, NL-Augmenter can help! More information can be found on <a href="/nl-augmenter">the dedicated site</a>.</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/resources","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM 💎 Resources</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/5fcd590ccf37fef1.css" as="style"/><link rel="stylesheet" href="/_next/static/css/5fcd590ccf37fef1.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/resources-0cab39da6b7e3a17.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Using our resources.</span><span class="utils_smallSpace__dcJPu"></span><div><p>As part of GEM, we are continuously producing resources for the research community. This page provides download links and brief explanations of each.</p></div><hr/><div class="resources_resources__7Vbk5"><div class="resources_resourceName__rTdCM"><a href="https://storage.googleapis.com/gem-benchmark/scores_and_outputs.zip">Outputs and Scores</a></div><div class="resources_resourceDetail__hI_Px">Our growing collection of millions of outputs and automatic scores for 20+ models across all GEM tasks. This resource is to be used for work on model evaluation, to characterize model shortcomings, and to provide baseline outputs for model comparison.</div><div class="resources_resourceName__rTdCM"><a href="https://huggingface.co/datasets/gem" target="_blank">HuggingFace Loader</a></div><div class="resources_resourceDetail__hI_Px">All our datasets can be loaded via this data loader implemented in HuggingFace datasets.</div><div class="resources_resourceName__rTdCM"><a href="https://www.tensorflow.org/datasets/catalog/gem" target="_blank">TFDS Loader</a></div><div class="resources_resourceDetail__hI_Px">All our datasets can be loaded via this data loader implemented in TFDS.</div><div class="resources_resourceName__rTdCM"><a href="https://github.com/GEM-benchmark/GEM-metrics" target="_blank">Metrics Repository</a></div><div class="resources_resourceDetail__hI_Px">Our package for model evaluation. If you want to compute our full suite of metrics with additional convenience functions like caching and parallelism, simply add your dataset to it and follow the instructions in the README.</div><div class="resources_resourceName__rTdCM"><a href="https://github.com/GEM-benchmark/NL-Augmenter" target="_blank">NL-Augmenter</a></div><div class="resources_resourceDetail__hI_Px">If you want to run robustness tests on your model and data, NL-Augmenter can help! More information can be found on <a href="/nl-augmenter">the dedicated site</a>.</div></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/resources","query":{},"buildId":"V1edrWahfIsCPthpIgASU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/results.html b/results.html
index d8b826be..94b43a5f 100644
--- a/results.html
+++ b/results.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><meta name="next-head-count" content="7"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/afca8ff2be2c1f7a.css" as="style"/><link rel="stylesheet" href="/_next/static/css/afca8ff2be2c1f7a.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/29107295-809b6f0b05884bf7.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/147-b9fd18d139b855ac.js" defer=""></script><script src="/_next/static/chunks/pages/results-d578c869a5275698.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX false"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main> loading ...</main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"evalConfig":{"common_metrics":{"rouge1":{"citation":"","description":"ROUGE score focusing on unigrams.","show_as":"ROUGE-1"},"rouge2":{"citation":"","description":"ROUGE score focusing on bigrams.","show_as":"ROUGE-2"},"rougeL":{"citation":"","description":"ROUGE score focusing on longest common subsequence.","show_as":"ROUGE-L"},"msttr-100":{"citation":"","description":"Mean Segmental Type-Token Ratio, a measure of lexical diversity.","show_as":"MSTTR"},"mean_pred_length":{"citation":"","description":"Average length of a system output.","show_as":"Output Length"},"distinct-1":{"citation":"","description":"Ratio of distinct unigrams / total number of unigrams.","show_as":"Distinct-1"},"vocab_size-1":{"citation":"","description":"Number of distinct words used by the system.","show_as":"Vocabulary Size"},"unique-1":{"citation":"","description":"number of unigrams that only occur once in the whole data.","show_as":"Unique-1"},"entropy-1":{"citation":"","description":"Shannon entropy over unigrams","show_as":"Entropy-1"},"distinct-2":{"citation":"","description":"Ratio of distinct bigrams / total number of bigrams.","show_as":"Distinct-2"},"vocab_size-2":{"citation":"","description":"Number of distinct bigrams used by the system.","show_as":"Bigram Vocabulary Size"},"unique-2":{"citation":"","description":"Number of bigrams that only occur once in the whole data","show_as":"Unique-2"},"entropy-2":{"citation":"","description":"Shannon entropy over bigrams.","show_as":"Entropy-2"},"cond_entropy-2":{"citation":"","description":"Language model style conditional entropy -- N-grams conditioned on N-1-grams.","show_as":"Bigram Conditional Entropy"},"distinct-3":{"citation":"","description":"Ratio of distinct trigrams / total number of trigrams.","show_as":"Distinct-3"},"vocab_size-3":{"citation":"","description":"Number of distinct trigrams used by the system.","show_as":"Trigram Vocabulary Size"},"unique-3":{"citation":"","description":"Number of trigrams that only occur once in the whole data.","show_as":"Unique-3"},"entropy-3":{"citation":"","description":"Shannon entropy over trigrams.","show_as":"Entropy-2"},"cond_entropy-3":{"citation":"","description":"Language model style conditional entropy -- N-grams conditioned on N-1-grams","show_as":"Trigram Conditional Entropy"},"bertscore":{"citation":"","description":"A BERT-based similarity measure between reference and generation.","show_as":"BERTScore"},"bleu":{"citation":"","description":"A measure of lexical similarity.","show_as":"BLEU"},"bleurt":{"citation":"","description":"A learned metric to measure semantic equivalence between reference and generation.","show_as":"BLEURT"},"meteor":{"citation":"","description":"An advanced lexical similarity metric also including stemming and synonymy matching.","show_as":"Meteor"},"nubia":{"citation":"","description":"A metric combining multiple aspects like entailment and similarity.","show_as":"NUBIA"},"sari":{"citation":"","description":"A simplification metric.","show_as":"SARI"},"nist":{"citation":"http://dl.acm.org/citation.cfm?id=1289189.1289273","description":"NIST is an alternative to BLEU with slightly different calculation.","show_as":"NIST"}},"challenges":{"data2text":{"datasets":["common_gen_test","dart_test","e2e_nlg_test","totto_test","cs_restaurants_test","web_nlg_en_test","web_nlg_ru_test"],"metrics":[]},"summarization":{"datasets":["mlsum_de_test","mlsum_es_test","xsum_test","wiki_lingua_turkish_tr_test","wiki_lingua_vietnamese_vi_test","wiki_lingua_spanish_es_test","wiki_lingua_russian_ru_test"],"metrics":[]},"dialog":{"datasets":["schema_guided_dialog_test"],"metrics":[]},"simplification":{"datasets":["wiki_auto_asset_turk_test_asset","wiki_auto_asset_turk_test_turk"],"metrics":[]}},"measures":{"diversity":["msttr-100","distinct-1","distinct-2","unique-1","unique-2","entropy-1","entropy-2","cond_entropy-2"],"lexical":["rouge1","rouge2","rougeL","bleu","meteor","sari","nist"],"semantic":["bertscore","bleurt"],"faithful":["nubia"],"descriptive":["mean_pred_length","vocab_size-1","vocab_size-2"]}}},"__N_SSG":true},"page":"/results","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><meta name="next-head-count" content="7"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/afca8ff2be2c1f7a.css" as="style"/><link rel="stylesheet" href="/_next/static/css/afca8ff2be2c1f7a.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/29107295-809b6f0b05884bf7.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/147-b9fd18d139b855ac.js" defer=""></script><script src="/_next/static/chunks/pages/results-2f15550ebb6e9ca7.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX false"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR layout_wideContainer__IUVFY"><main> loading ...</main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"evalConfig":{"common_metrics":{"rouge1":{"citation":"","description":"ROUGE score focusing on unigrams.","show_as":"ROUGE-1"},"rouge2":{"citation":"","description":"ROUGE score focusing on bigrams.","show_as":"ROUGE-2"},"rougeL":{"citation":"","description":"ROUGE score focusing on longest common subsequence.","show_as":"ROUGE-L"},"msttr-100":{"citation":"","description":"Mean Segmental Type-Token Ratio, a measure of lexical diversity.","show_as":"MSTTR"},"mean_pred_length":{"citation":"","description":"Average length of a system output.","show_as":"Output Length"},"distinct-1":{"citation":"","description":"Ratio of distinct unigrams / total number of unigrams.","show_as":"Distinct-1"},"vocab_size-1":{"citation":"","description":"Number of distinct words used by the system.","show_as":"Vocabulary Size"},"unique-1":{"citation":"","description":"number of unigrams that only occur once in the whole data.","show_as":"Unique-1"},"entropy-1":{"citation":"","description":"Shannon entropy over unigrams","show_as":"Entropy-1"},"distinct-2":{"citation":"","description":"Ratio of distinct bigrams / total number of bigrams.","show_as":"Distinct-2"},"vocab_size-2":{"citation":"","description":"Number of distinct bigrams used by the system.","show_as":"Bigram Vocabulary Size"},"unique-2":{"citation":"","description":"Number of bigrams that only occur once in the whole data","show_as":"Unique-2"},"entropy-2":{"citation":"","description":"Shannon entropy over bigrams.","show_as":"Entropy-2"},"cond_entropy-2":{"citation":"","description":"Language model style conditional entropy -- N-grams conditioned on N-1-grams.","show_as":"Bigram Conditional Entropy"},"distinct-3":{"citation":"","description":"Ratio of distinct trigrams / total number of trigrams.","show_as":"Distinct-3"},"vocab_size-3":{"citation":"","description":"Number of distinct trigrams used by the system.","show_as":"Trigram Vocabulary Size"},"unique-3":{"citation":"","description":"Number of trigrams that only occur once in the whole data.","show_as":"Unique-3"},"entropy-3":{"citation":"","description":"Shannon entropy over trigrams.","show_as":"Entropy-2"},"cond_entropy-3":{"citation":"","description":"Language model style conditional entropy -- N-grams conditioned on N-1-grams","show_as":"Trigram Conditional Entropy"},"bertscore":{"citation":"","description":"A BERT-based similarity measure between reference and generation.","show_as":"BERTScore"},"bleu":{"citation":"","description":"A measure of lexical similarity.","show_as":"BLEU"},"bleurt":{"citation":"","description":"A learned metric to measure semantic equivalence between reference and generation.","show_as":"BLEURT"},"meteor":{"citation":"","description":"An advanced lexical similarity metric also including stemming and synonymy matching.","show_as":"Meteor"},"nubia":{"citation":"","description":"A metric combining multiple aspects like entailment and similarity.","show_as":"NUBIA"},"sari":{"citation":"","description":"A simplification metric.","show_as":"SARI"},"nist":{"citation":"http://dl.acm.org/citation.cfm?id=1289189.1289273","description":"NIST is an alternative to BLEU with slightly different calculation.","show_as":"NIST"}},"challenges":{"data2text":{"datasets":["common_gen_test","dart_test","e2e_nlg_test","totto_test","cs_restaurants_test","web_nlg_en_test","web_nlg_ru_test"],"metrics":[]},"summarization":{"datasets":["mlsum_de_test","mlsum_es_test","xsum_test","wiki_lingua_turkish_tr_test","wiki_lingua_vietnamese_vi_test","wiki_lingua_spanish_es_test","wiki_lingua_russian_ru_test"],"metrics":[]},"dialog":{"datasets":["schema_guided_dialog_test"],"metrics":[]},"simplification":{"datasets":["wiki_auto_asset_turk_test_asset","wiki_auto_asset_turk_test_turk"],"metrics":[]}},"measures":{"diversity":["msttr-100","distinct-1","distinct-2","unique-1","unique-2","entropy-1","entropy-2","cond_entropy-2"],"lexical":["rouge1","rouge2","rougeL","bleu","meteor","sari","nist"],"semantic":["bertscore","bleurt"],"faithful":["nubia"],"descriptive":["mean_pred_length","vocab_size-1","vocab_size-2"]}}},"__N_SSG":true},"page":"/results","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/shared_task.html b/shared_task.html
index 7cb532a4..8825a4ef 100644
--- a/shared_task.html
+++ b/shared_task.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2021</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/shared_task-05cde75cf2b87867.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Shared Task at the GEM Workshop at ACL 2021</span><span class="utils_smallSpace__dcJPu"></span><div><p><strong>UPDATE</strong> Our <a href="https://forms.gle/vbTZDMCuqzok8tTA9">submission form</a> is now open! Please account for some extra time to write your model card.</p>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2021</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/shared_task-18f5bf90896da33a.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Shared Task at the GEM Workshop at ACL 2021</span><span class="utils_smallSpace__dcJPu"></span><div><p><strong>UPDATE</strong> Our <a href="https://forms.gle/vbTZDMCuqzok8tTA9">submission form</a> is now open! Please account for some extra time to write your model card.</p>
 <p>The GEM workshop features a two-part shared task: <strong>Modeling</strong> and <strong>Evaluation</strong>. In the modeling shared task, we ask participants to submit model outputs on the GEM tasks. For the evaluation shared task, participants will have access to outputs from the modeling shared task and computed evaluation metrics. The goal is to draw open-ended insights from the set of data, for example by finding shortcuts models have taken, or by exposing limitations in the metrics.</p>
 <p>Neither of the shared tasks will have a winner or loser and there will be no leaderboard that ranks the results. Instead, we see this as a shared quest toward understanding the limitations and opportunities of current NLG systems. We thus encourage widespread participation for systems of all shapes and sizes.</p>
 <p>To stay up-to-date on announcements, please join our <a href="https://groups.google.com/g/gem-benchmark">Google Group</a>. The same group may be used for questions and discussions.</p>
@@ -50,4 +50,4 @@ <h2 id="user-content-important-dates">Important Dates</h2>
 <p><code>June 25</code> Notification of Acceptance</p>
 <p><code>July 9</code> Camera-ready due</p>
 <p><code>August 5-6</code> Workshop Dates</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"sharedTaskData":{"contentHtml":"\u003cp\u003e\u003cstrong\u003eUPDATE\u003c/strong\u003e Our \u003ca href=\"https://forms.gle/vbTZDMCuqzok8tTA9\"\u003esubmission form\u003c/a\u003e is now open! Please account for some extra time to write your model card.\u003c/p\u003e\n\u003cp\u003eThe GEM workshop features a two-part shared task: \u003cstrong\u003eModeling\u003c/strong\u003e and \u003cstrong\u003eEvaluation\u003c/strong\u003e. In the modeling shared task, we ask participants to submit model outputs on the GEM tasks. For the evaluation shared task, participants will have access to outputs from the modeling shared task and computed evaluation metrics. The goal is to draw open-ended insights from the set of data, for example by finding shortcuts models have taken, or by exposing limitations in the metrics.\u003c/p\u003e\n\u003cp\u003eNeither of the shared tasks will have a winner or loser and there will be no leaderboard that ranks the results. Instead, we see this as a shared quest toward understanding the limitations and opportunities of current NLG systems. We thus encourage widespread participation for systems of all shapes and sizes.\u003c/p\u003e\n\u003cp\u003eTo stay up-to-date on announcements, please join our \u003ca href=\"https://groups.google.com/g/gem-benchmark\"\u003eGoogle Group\u003c/a\u003e. The same group may be used for questions and discussions.\u003c/p\u003e\n\u003ch2 id=\"user-content-modeling-shared-task\"\u003eModeling Shared Task\u003c/h2\u003e\n\u003cp\u003eTo participate in the task, simply train a system on one or more of the included tasks (the more, the better, but even outputs on one task help!) and submit your validation and test outputs.\u003c/p\u003e\n\u003cp\u003eSimilar to \u003ca href=\"http://www.statmt.org/wmt20/translation-task.html\"\u003eWMT\u003c/a\u003e, we additionally ask every participant of this task to sign up for approximately five hours of labeling time to produce gold-data for our human evaluation. More information on this is coming soon.\u003c/p\u003e\n\u003ch3 id=\"user-content-data\"\u003eData\u003c/h3\u003e\n\u003cp\u003eThe training and validation sets are available through \u003ca href=\"https://huggingface.co/docs/datasets/\"\u003eHuggingface Datasets\u003c/a\u003e and \u003ca href=\"https://www.tensorflow.org/datasets\"\u003eTensorflow Datasets\u003c/a\u003e. Using Huggingface, you can load a dataset as follows:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e datasets \u003cspan\u003eimport\u003c/span\u003e load_dataset\ndata = load_dataset(\u003cspan\u003e'GEM'\u003c/span\u003e, \u003cspan\u003e'dataset_identifier'\u003c/span\u003e)\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eFor detailed information on the fields in each of our datasets, have a look at the \u003ca href=\"https://huggingface.co/datasets/gem#dataset-description\"\u003edocumentation\u003c/a\u003e. For more information on how to get started training models, see our \u003ca href=\"/get_started\"\u003etutorial\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-submitting-outputs\"\u003eSubmitting Outputs\u003c/h3\u003e\n\u003cp\u003eMost datasets have associated challenge sets, denoted by \u003ccode\u003echallenge_\u003c/code\u003e in addition to the default\n\u003ccode\u003etest\u003c/code\u003e sets. While we highly encourage participation in the shared task even for a single dataset,\nwe ask you to please submit outputs for all possible challenge sets to help us assess your submission.\u003c/p\u003e\n\u003ch4 id=\"user-content-format\"\u003eFormat\u003c/h4\u003e\n\u003cp\u003ePlease format submissions in the following format\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e{\u003c/span\u003e\n  \u003cspan\u003e\"submission_name\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"An identifying name of your system\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"param_count\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e123\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e # the number of parameters your system has.\n  \u003cspan\u003e\"description\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"An optional brief description of the system that will be shown on the website\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"tasks\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e\n    \u003cspan\u003e{\u003c/span\u003e\n      \u003cspan\u003e\"dataset_identifier\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e{\u003c/span\u003e\n        \u003cspan\u003e\"values\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e\"output1\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"output2\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"...\"\u003c/span\u003e\u003cspan\u003e]\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e # A list of system outputs\n        # Optionally\u003cspan\u003e,\u003c/span\u003e you can add the keys which are part of an example to ensure that there is no shuffling mistakes.\n        \u003cspan\u003e\"keys\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e\"schema_guided_dialog-test-9585\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"schema_guided_dialog-test-9585\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e ...\u003cspan\u003e]\u003c/span\u003e \n        \u003cspan\u003e}\u003c/span\u003e\n    \u003cspan\u003e}\u003c/span\u003e\n\u003cspan\u003e}\u003c/span\u003e\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIn this case, \u003ccode\u003edataset_identifier\u003c/code\u003e is the identifier of the dataset followed by an identifier of the set the outputs were created from, for example \u003ccode\u003e_validation\u003c/code\u003e or \u003ccode\u003e_test\u003c/code\u003e. That means, the CommonGEN validation set would have the identifier \u003ccode\u003ecommongen_validation\u003c/code\u003e.\u003c/p\u003e\n\u003ch4 id=\"user-content-model-card\"\u003eModel Card\u003c/h4\u003e\n\u003cp\u003eOur submission form linked below includes a series of required questions that we will use to construct model cards for each model. Before submitting, please take the time to carefully read through these questions and prepare an answer. Any system descrition submitted to the workshop should also include the questions and answers.\u003c/p\u003e\n\u003ch4 id=\"user-content-submission-link\"\u003eSubmission Link\u003c/h4\u003e\n\u003cp\u003ePlease submit your model outputs \u003ca href=\"https://forms.gle/vbTZDMCuqzok8tTA9\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-evaluation-and-analysis-shared-task\"\u003eEvaluation and Analysis Shared Task\u003c/h2\u003e\n\u003cp\u003eThe evaluation and analysis shared task will start once the system outputs are submitted and we have run the automatic evaluation. Participants will be able to download the outputs and metrics and run their own analyses. The resulting findings can be submitted to our workshop.\u003c/p\u003e\n\u003ch2 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h2\u003e\n\u003cp\u003e\u003cstrong\u003eModeling\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eFebruary 2\u003c/code\u003e Release of the Training Data\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMarch 29\u003c/code\u003e Release of the Test Sets\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 14\u003c/code\u003e Modeling Submissions due\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eEvaluation\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMarch 29\u003c/code\u003e Release of the Baseline Outputs\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 17\u003c/code\u003e Release of the Submission Outputs\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eSystem Descriptions and Analyses\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 11\u003c/code\u003e System Descriptions and Analyses due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 25\u003c/code\u003e Notification of Acceptance\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJuly 9\u003c/code\u003e Camera-ready due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eAugust 5-6\u003c/code\u003e Workshop Dates\u003c/p\u003e\n","title":"GEM Shared Task at ACL 2021"}},"__N_SSG":true},"page":"/shared_task","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"sharedTaskData":{"contentHtml":"\u003cp\u003e\u003cstrong\u003eUPDATE\u003c/strong\u003e Our \u003ca href=\"https://forms.gle/vbTZDMCuqzok8tTA9\"\u003esubmission form\u003c/a\u003e is now open! Please account for some extra time to write your model card.\u003c/p\u003e\n\u003cp\u003eThe GEM workshop features a two-part shared task: \u003cstrong\u003eModeling\u003c/strong\u003e and \u003cstrong\u003eEvaluation\u003c/strong\u003e. In the modeling shared task, we ask participants to submit model outputs on the GEM tasks. For the evaluation shared task, participants will have access to outputs from the modeling shared task and computed evaluation metrics. The goal is to draw open-ended insights from the set of data, for example by finding shortcuts models have taken, or by exposing limitations in the metrics.\u003c/p\u003e\n\u003cp\u003eNeither of the shared tasks will have a winner or loser and there will be no leaderboard that ranks the results. Instead, we see this as a shared quest toward understanding the limitations and opportunities of current NLG systems. We thus encourage widespread participation for systems of all shapes and sizes.\u003c/p\u003e\n\u003cp\u003eTo stay up-to-date on announcements, please join our \u003ca href=\"https://groups.google.com/g/gem-benchmark\"\u003eGoogle Group\u003c/a\u003e. The same group may be used for questions and discussions.\u003c/p\u003e\n\u003ch2 id=\"user-content-modeling-shared-task\"\u003eModeling Shared Task\u003c/h2\u003e\n\u003cp\u003eTo participate in the task, simply train a system on one or more of the included tasks (the more, the better, but even outputs on one task help!) and submit your validation and test outputs.\u003c/p\u003e\n\u003cp\u003eSimilar to \u003ca href=\"http://www.statmt.org/wmt20/translation-task.html\"\u003eWMT\u003c/a\u003e, we additionally ask every participant of this task to sign up for approximately five hours of labeling time to produce gold-data for our human evaluation. More information on this is coming soon.\u003c/p\u003e\n\u003ch3 id=\"user-content-data\"\u003eData\u003c/h3\u003e\n\u003cp\u003eThe training and validation sets are available through \u003ca href=\"https://huggingface.co/docs/datasets/\"\u003eHuggingface Datasets\u003c/a\u003e and \u003ca href=\"https://www.tensorflow.org/datasets\"\u003eTensorflow Datasets\u003c/a\u003e. Using Huggingface, you can load a dataset as follows:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e datasets \u003cspan\u003eimport\u003c/span\u003e load_dataset\ndata = load_dataset(\u003cspan\u003e'GEM'\u003c/span\u003e, \u003cspan\u003e'dataset_identifier'\u003c/span\u003e)\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eFor detailed information on the fields in each of our datasets, have a look at the \u003ca href=\"https://huggingface.co/datasets/gem#dataset-description\"\u003edocumentation\u003c/a\u003e. For more information on how to get started training models, see our \u003ca href=\"/get_started\"\u003etutorial\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-submitting-outputs\"\u003eSubmitting Outputs\u003c/h3\u003e\n\u003cp\u003eMost datasets have associated challenge sets, denoted by \u003ccode\u003echallenge_\u003c/code\u003e in addition to the default\n\u003ccode\u003etest\u003c/code\u003e sets. While we highly encourage participation in the shared task even for a single dataset,\nwe ask you to please submit outputs for all possible challenge sets to help us assess your submission.\u003c/p\u003e\n\u003ch4 id=\"user-content-format\"\u003eFormat\u003c/h4\u003e\n\u003cp\u003ePlease format submissions in the following format\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e{\u003c/span\u003e\n  \u003cspan\u003e\"submission_name\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"An identifying name of your system\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"param_count\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e123\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e # the number of parameters your system has.\n  \u003cspan\u003e\"description\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"An optional brief description of the system that will be shown on the website\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"tasks\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e\n    \u003cspan\u003e{\u003c/span\u003e\n      \u003cspan\u003e\"dataset_identifier\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e{\u003c/span\u003e\n        \u003cspan\u003e\"values\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e\"output1\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"output2\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"...\"\u003c/span\u003e\u003cspan\u003e]\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e # A list of system outputs\n        # Optionally\u003cspan\u003e,\u003c/span\u003e you can add the keys which are part of an example to ensure that there is no shuffling mistakes.\n        \u003cspan\u003e\"keys\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e\"schema_guided_dialog-test-9585\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"schema_guided_dialog-test-9585\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e ...\u003cspan\u003e]\u003c/span\u003e \n        \u003cspan\u003e}\u003c/span\u003e\n    \u003cspan\u003e}\u003c/span\u003e\n\u003cspan\u003e}\u003c/span\u003e\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIn this case, \u003ccode\u003edataset_identifier\u003c/code\u003e is the identifier of the dataset followed by an identifier of the set the outputs were created from, for example \u003ccode\u003e_validation\u003c/code\u003e or \u003ccode\u003e_test\u003c/code\u003e. That means, the CommonGEN validation set would have the identifier \u003ccode\u003ecommongen_validation\u003c/code\u003e.\u003c/p\u003e\n\u003ch4 id=\"user-content-model-card\"\u003eModel Card\u003c/h4\u003e\n\u003cp\u003eOur submission form linked below includes a series of required questions that we will use to construct model cards for each model. Before submitting, please take the time to carefully read through these questions and prepare an answer. Any system descrition submitted to the workshop should also include the questions and answers.\u003c/p\u003e\n\u003ch4 id=\"user-content-submission-link\"\u003eSubmission Link\u003c/h4\u003e\n\u003cp\u003ePlease submit your model outputs \u003ca href=\"https://forms.gle/vbTZDMCuqzok8tTA9\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-evaluation-and-analysis-shared-task\"\u003eEvaluation and Analysis Shared Task\u003c/h2\u003e\n\u003cp\u003eThe evaluation and analysis shared task will start once the system outputs are submitted and we have run the automatic evaluation. Participants will be able to download the outputs and metrics and run their own analyses. The resulting findings can be submitted to our workshop.\u003c/p\u003e\n\u003ch2 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h2\u003e\n\u003cp\u003e\u003cstrong\u003eModeling\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eFebruary 2\u003c/code\u003e Release of the Training Data\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMarch 29\u003c/code\u003e Release of the Test Sets\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 14\u003c/code\u003e Modeling Submissions due\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eEvaluation\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMarch 29\u003c/code\u003e Release of the Baseline Outputs\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 17\u003c/code\u003e Release of the Submission Outputs\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eSystem Descriptions and Analyses\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 11\u003c/code\u003e System Descriptions and Analyses due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 25\u003c/code\u003e Notification of Acceptance\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJuly 9\u003c/code\u003e Camera-ready due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eAugust 5-6\u003c/code\u003e Workshop Dates\u003c/p\u003e\n","title":"GEM Shared Task at ACL 2021"}},"__N_SSG":true},"page":"/shared_task","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/team.html b/team.html
index 81413790..57f520df 100644
--- a/team.html
+++ b/team.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEMv2 Team 2022</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/be720738ed0b38ae.css" as="style"/><link rel="stylesheet" href="/_next/static/css/be720738ed0b38ae.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/2cca2479-7e9f1af5d51da309.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/team-52cb272d62212456.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><div class="utils_headingXl__zlq1q">GEMv2 Team</div><div class="team_description__DjHeY">GEM is a community-driven effort to improve evaluation of natural language generation. It would not be possible without a large group of collaborators to take on challenging tasks. You can see the contributor list to GEMv1 <a href="/team/2021"><a>here</a></a>.<p>This page acts as a directory of our amazing contributors. If you want to join the organization,  <a href="/team/join"><a>click here to fill out the sign-up form.</a></a></p></div><div class="team_centered__joWCZ"><section class="team_cards__RqvG4"><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sebastian Gehrmann</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL">Hello World :)</div><div><a href="https://sebastiangehrmann.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@SebGehr" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Antoine Bosselut</h3><p class="team_title__Fwwzf">EPFL</p><div class="team_note__rPDRL"></div><div><a href="https://atcbosselut.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@ABosselut" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Laura Perez-Beltrachini</h3><p class="team_title__Fwwzf">University of Edinburgh</p><div class="team_note__rPDRL"></div><div><a href="http://homepages.inf.ed.ac.uk/lperez/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Samira Shaikh</h3><p class="team_title__Fwwzf">UNC Charlotte</p><div class="team_note__rPDRL"></div><div><a href="https://webpages.uncc.edu/sshaikh2/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Wei Xu</h3><p class="team_title__Fwwzf">Georgia Tech</p><div class="team_note__rPDRL"></div><div><a href="https://cocoxu.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@cocoweixu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Esin Durmus</h3><p class="team_title__Fwwzf">Stanford University</p><div class="team_note__rPDRL"></div><div><a href="http://www.cs.cornell.edu/~esindurmus/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@esindurmusnlp" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Varun Prashant Gangal</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div><div><a href="https://vgtomahawk.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@varungangal" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Tosin Adewumi</h3><p class="team_title__Fwwzf">Luleå University of Technology</p><div class="team_note__rPDRL"></div><div> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@tosintwit" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Pawan Sasanka Ammanamanchi</h3><p class="team_title__Fwwzf">IIIT Hyderabad</p><div class="team_note__rPDRL"></div><div><a href="https://www.pawansasanka.me/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@paws_ed" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Khyathi Raghavi Chandu</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div><div><a href="http://www.cs.cmu.edu/~kchandu/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@khyathi_chandu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Miruna Clinciu</h3><p class="team_title__Fwwzf">Edinburgh Centre for Robotics</p><div class="team_note__rPDRL"></div><div><a href="https://www.mirunaclinciu.com" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@MirunaClinciu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Kaustubh Dhole</h3><p class="team_title__Fwwzf">Amelia R&amp;D, New York</p><div class="team_note__rPDRL"></div><div><a href="https://in.linkedin.com/in/kaustubh-dhol%C3%A9-3929b32a" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@KaustubhDhole" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ondřej Dušek</h3><p class="team_title__Fwwzf">Charles University, Prague</p><div class="team_note__rPDRL"></div><div><a href="https://tuetschek.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@tuetschek" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Chris Chineye Emezue</h3><p class="team_title__Fwwzf">Technical University Munich</p><div class="team_note__rPDRL"></div><div><a href="https://github.com/chrisemezue" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@ChrisEmezue" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Cristina Garbacea</h3><p class="team_title__Fwwzf">University of Michigan, Ann Arbor</p><div class="team_note__rPDRL"></div><div><a href="https://cristinagarbacea.com/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@ggarbacea" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yufang Hou</h3><p class="team_title__Fwwzf">IBM Research</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Harsh Jhamtani</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yangfeng Ji</h3><p class="team_title__Fwwzf">University of Virginia</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Shailza Jolly</h3><p class="team_title__Fwwzf">Technical University of Kaiserslautern and DFKI, Germany</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dhruv Kumar</h3><p class="team_title__Fwwzf">University of Waterloo</p><div class="team_note__rPDRL"></div><div><a href="https://ddhruvkr.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@ddhruvkr" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Faisal Ladhak</h3><p class="team_title__Fwwzf">Columbia University</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Aman Madaan</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div><div><a href="https://madaan.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@aman_madaan" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Khyati Mahajan</h3><p class="team_title__Fwwzf">UNC Charlotte</p><div class="team_note__rPDRL"></div><div><a href="https://khyatimahajan.com/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Saad Mahamood</h3><p class="team_title__Fwwzf">trivago</p><div class="team_note__rPDRL"></div><div><a href="https://saad.me.uk" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@Saad_M" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Angie McMillan-Major</h3><p class="team_title__Fwwzf">University of Washington, Huggingface</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Simon Mille</h3><p class="team_title__Fwwzf">Pompeu Fabra University</p><div class="team_note__rPDRL"></div><div><a href="https://scholar.google.es/citations?user=F8kFik0AAAAJ&amp;hl=en" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Shashi Narayan</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div><div><a href="https://sites.google.com/corp/view/shashinarayan/home" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Vitaly Nikolaev</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Rubungo Andre Niyongabo</h3><p class="team_title__Fwwzf">Polytechnic University of Catalonia</p><div class="team_note__rPDRL"></div><div><a href="https://www.linkedin.com/in/rubungo-andre-niyongabo-851370168" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@andre_niyongabo" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Salomey Osei</h3><p class="team_title__Fwwzf">Kwame Nkrumah University of Science and Technology</p><div class="team_note__rPDRL"></div><div><a href="https://www.linkedin.com/in/salomey-osei-4b08a5b8" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@NanaYaaSally" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Niranjan Ramesh Rao</h3><p class="team_title__Fwwzf">National Institute of Technology Karnataka India</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Vikas Raunak</h3><p class="team_title__Fwwzf">Microsoft</p><div class="team_note__rPDRL"></div><div><a href="https://vyraun.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@vyraun" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sashank Santhanam</h3><p class="team_title__Fwwzf">UNC Charlotte/ JP Morgan</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">João Sedoc</h3><p class="team_title__Fwwzf">New York University Stern School of Business</p><div class="team_note__rPDRL"></div><div><a href="https://www.joaosedoc.com" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@joaosedoc" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Hendrik Strobelt</h3><p class="team_title__Fwwzf">IBM Research</p><div class="team_note__rPDRL"></div><div><a href="http://hendrik.strobelt.com" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@hen_str" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Nishant Subramani</h3><p class="team_title__Fwwzf">Allen institute for AI</p><div class="team_note__rPDRL">Looking for PhD positions to start Fall 2023</div><div><a href="https://hatat5.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@nsubramani23" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Emiel van Miltenburg</h3><p class="team_title__Fwwzf">Tilburg University</p><div class="team_note__rPDRL"></div><div><a href="https://emielvanmiltenburg.nl" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@evanmiltenburg" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Diyi Yang</h3><p class="team_title__Fwwzf">Georgia Tech</p><div class="team_note__rPDRL"></div><div> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@diyi_yang" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yacine Jernite</h3><p class="team_title__Fwwzf">Huggingface</p><div class="team_note__rPDRL"></div><div><a href="https://yjernite.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@YJernite" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Akhila Yerukola</h3><p class="team_title__Fwwzf">Samsung Research</p><div class="team_note__rPDRL"></div><div><a href="http://akhila-yerukola.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@akhila-yerukola" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jiawei Zhou</h3><p class="team_title__Fwwzf">Harvard University</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Nivranshu Pasricha</h3><p class="team_title__Fwwzf">National University of Ireland Galway</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Anant Khandelwal</h3><p class="team_title__Fwwzf">Amazon</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sanja Stajner</h3><p class="team_title__Fwwzf">Symanto Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yi-Ling Chung</h3><p class="team_title__Fwwzf">University of Trento, Fondazione Bruno Kessler</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jekaterina Novikova</h3><p class="team_title__Fwwzf">Winterlight</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Alex Wang</h3><p class="team_title__Fwwzf">New York University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Daniel Deutsch</h3><p class="team_title__Fwwzf">University of Pennsylvania</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mihir Sanjay Kale</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Khyathi Majahan</h3><p class="team_title__Fwwzf">UNC Charlotte</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Pawan Kumar Rajpoot</h3><p class="team_title__Fwwzf">Rakuten India</p><div class="team_note__rPDRL"></div><div><a href="https://www.linkedin.com/in/pawanrajpoot/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Nico Daheim</h3><p class="team_title__Fwwzf">RWTH Aachen</p><div class="team_note__rPDRL">Currently applying for PhD positions!</div><div><a href="https://ndaheim.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Thomas Scialom</h3><p class="team_title__Fwwzf">Sorbonne university</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Vipul Raheja</h3><p class="team_title__Fwwzf">Grammarly</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mohit Sudhakar</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Leonardo Ribeiro</h3><p class="team_title__Fwwzf">TU Darmstadt</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Steven Feng</h3><p class="team_title__Fwwzf">Stanford University</p><div class="team_note__rPDRL"></div><div><a href="https://styfeng.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@stevenyfeng" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Michael White</h3><p class="team_title__Fwwzf">The Ohio State University / Facebook</p><div class="team_note__rPDRL">Looking for a postdoc :)</div><div><a href="https://u.osu.edu/mwhite" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@mwhite14850" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Alex Fabbri</h3><p class="team_title__Fwwzf">Salesforce</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Wout Schellaert</h3><p class="team_title__Fwwzf">UPValencia</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sebastien Montella</h3><p class="team_title__Fwwzf">Orange Labs</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Bonaventure Dossou</h3><p class="team_title__Fwwzf">Jacobs University Bremen, Mila Quebec AI Institute</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Anna Shvets</h3><p class="team_title__Fwwzf">FabLab by Inetum</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">SK Mainul Islam</h3><p class="team_title__Fwwzf">IIT Kharagpur</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mohammad Sanad Zaki</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Richard Plant</h3><p class="team_title__Fwwzf">Edinburgh Napier University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Nick Doiron</h3><p class="team_title__Fwwzf">Tufts University / HPE</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dakuo Wang</h3><p class="team_title__Fwwzf">IBM Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jenny Chim</h3><p class="team_title__Fwwzf">Queen Mary University of London</p><div class="team_note__rPDRL"></div><div><a href="https://j-chim.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@jennycwchim" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Rabin Banjade</h3><p class="team_title__Fwwzf">University of Memphis</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ronald Cardenas</h3><p class="team_title__Fwwzf">University of Edinburgh</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dongyeop Kang</h3><p class="team_title__Fwwzf">University of Minnesota</p><div class="team_note__rPDRL">Looking for PhD students</div><div><a href="https://dykang.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/https://twitter.com/dongyeopkang" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Elizabeth Clark</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Joshua Maynez</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Abhik Bhattacharjee</h3><p class="team_title__Fwwzf">Bangladesh University of Engineering and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Tahmid Hasan</h3><p class="team_title__Fwwzf">Bangladesh University of Engineering and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ananth Rs</h3><p class="team_title__Fwwzf">Microsoft</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Aadesh Gupta</h3><p class="team_title__Fwwzf">Amelia, Senior RnD</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Eleftheria Briakou</h3><p class="team_title__Fwwzf">University of Maryland, College Park</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Joel R Tetreault</h3><p class="team_title__Fwwzf">Dataminr</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Roee Aharoni</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ashish Shrivastava</h3><p class="team_title__Fwwzf">Agara.ai</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Abinaya Mahendiran</h3><p class="team_title__Fwwzf">NEXT Labs, Mphasis</p><div class="team_note__rPDRL">Looking for collaborations</div><div><a href="https://abinayam02.github.io/home/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@feakynut" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jordan Clive</h3><p class="team_title__Fwwzf">Imperial College London</p><div class="team_note__rPDRL">NLG Collaborations Welcome</div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Samuel Cahyawijaya</h3><p class="team_title__Fwwzf">HKUST</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Qi Zhu</h3><p class="team_title__Fwwzf">Tsinghua University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Bryan Wilie</h3><p class="team_title__Fwwzf">The Hong Kong University of Science and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mathias Creutz</h3><p class="team_title__Fwwzf">University of Helsinki</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Craig Thomson</h3><p class="team_title__Fwwzf">University of Aberdeen</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Alexandros Papangelis</h3><p class="team_title__Fwwzf">Amazon Alexa AI</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Angelina McMillan-Major</h3><p class="team_title__Fwwzf">Hugging Face</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ashish Upadhyay</h3><p class="team_title__Fwwzf">Robert Gordon University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Leonardo F. R. Ribeiro</h3><p class="team_title__Fwwzf">Technical University of Darmstadt</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Moussa Kamal Eddine</h3><p class="team_title__Fwwzf">École Polytechnique</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Reno Kriz</h3><p class="team_title__Fwwzf">Johns Hopkins University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Di Jin</h3><p class="team_title__Fwwzf">Amazon Alexa AI</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Li Zhang</h3><p class="team_title__Fwwzf">University of Pennsylvania</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Filip Ginter</h3><p class="team_title__Fwwzf">University of Turku</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jenna Kanerva</h3><p class="team_title__Fwwzf">University of Turku</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dimitra Gkatzia</h3><p class="team_title__Fwwzf">Edinburgh Napier University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Genta Indra Winata</h3><p class="team_title__Fwwzf">The Hong Kong University of Science and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ratish Puduppully</h3><p class="team_title__Fwwzf">University of Edinburgh</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sanja Štajner</h3><p class="team_title__Fwwzf">Symanto Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mihir Sanjay Kale</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Chandra Bhagavatula</h3><p class="team_title__Fwwzf">Allen Institute for AI</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Deyi Xiong</h3><p class="team_title__Fwwzf">Tianjin University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Juraj Juraska</h3><p class="team_title__Fwwzf">University of California, Santa Cruz</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Tianhao Shen</h3><p class="team_title__Fwwzf">Tianjin University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Chaobin You</h3><p class="team_title__Fwwzf">Tianjin University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Paul Pu Liang</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Rifat Shahriyar</h3><p class="team_title__Fwwzf">Bangladesh University of Engineering and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Lewis Tunstall</h3><p class="team_title__Fwwzf">Hugging Face</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mahima Pushkarna</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Vivian Tsai</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yisi Sang</h3><p class="team_title__Fwwzf">Syracuse University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yixin Liu</h3><p class="team_title__Fwwzf">Yale University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Hiroaki Hayashi</h3><p class="team_title__Fwwzf">Salesforce Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dragomir Radev</h3><p class="team_title__Fwwzf">Yale University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Bingsheng Yao</h3><p class="team_title__Fwwzf">Rensselaer Polytechnic Institute</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ying Xu</h3><p class="team_title__Fwwzf">University of Michigan</p><div class="team_note__rPDRL"></div></div></section></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"teamData":{"teamMembers":[{"name":"Sebastian Gehrmann","position":"Senior Research Scientist","organization":"Google Research","website":"https://sebastiangehrmann.github.io","twitter":"@SebGehr","note":"Hello World :)","tags":["GEMv1"]},{"name":"Antoine Bosselut","position":"Assistant Professor","organization":"EPFL","website":"https://atcbosselut.github.io/","twitter":"@ABosselut","note":"","tags":["GEMv1"]},{"name":"Laura Perez-Beltrachini","position":"Research Assistant","organization":"University of Edinburgh","website":"http://homepages.inf.ed.ac.uk/lperez/","twitter":"","note":"","tags":["GEMv1"]},{"name":"Samira Shaikh","position":"Assistant Professor","organization":"UNC Charlotte","website":"https://webpages.uncc.edu/sshaikh2/","twitter":"","note":"","tags":["GEMv1"]},{"name":"Wei Xu","position":"Assistant Professor","organization":"Georgia Tech","website":"https://cocoxu.github.io/","twitter":"@cocoweixu","note":"","tags":["GEMv1"]},{"name":"Esin Durmus","position":"Postdoc","organization":"Stanford University","website":"http://www.cs.cornell.edu/~esindurmus/","twitter":"@esindurmusnlp","note":"","tags":["GEMv1"]},{"name":"Varun Prashant Gangal","position":"Graduate Student","organization":"Carnegie Mellon University","website":"https://vgtomahawk.github.io/","twitter":"@varungangal","note":"","tags":["GEMv1"]},{"name":"Tosin Adewumi","position":"","organization":"Luleå University of Technology","website":"","twitter":"@tosintwit","note":"","tags":["GEMv1"]},{"name":"Pawan Sasanka Ammanamanchi","position":"Graduate Student","organization":"IIIT Hyderabad","website":"https://www.pawansasanka.me/","twitter":"@paws_ed","note":"","tags":["GEMv1"]},{"name":"Khyathi Raghavi Chandu","position":"Graduate Student","organization":"Carnegie Mellon University","website":"http://www.cs.cmu.edu/~kchandu/","twitter":"@khyathi_chandu","note":"","tags":["GEMv1"]},{"name":"Miruna Clinciu","position":"Graduate Student","organization":"Edinburgh Centre for Robotics","website":"https://www.mirunaclinciu.com","twitter":"@MirunaClinciu","note":"","tags":["GEMv1"]},{"name":"Kaustubh Dhole","position":"Research Lead","organization":"Amelia R\u0026D, New York","website":"https://in.linkedin.com/in/kaustubh-dhol%C3%A9-3929b32a","twitter":"@KaustubhDhole","note":"","tags":["GEMv1"]},{"name":"Ondřej Dušek","position":"Assistant Professor","organization":"Charles University, Prague","website":"https://tuetschek.github.io","twitter":"@tuetschek","note":"","tags":["GEMv1"]},{"name":"Chris Chineye Emezue","position":"Graduate Student","organization":"Technical University Munich","website":"https://github.com/chrisemezue","twitter":"@ChrisEmezue","note":"","tags":["GEMv1"]},{"name":"Cristina Garbacea","position":"Graduate Student","organization":"University of Michigan, Ann Arbor","website":"https://cristinagarbacea.com/","twitter":"@ggarbacea","note":"","tags":["GEMv1"]},{"name":"Yufang Hou","position":"Researcher","organization":"IBM Research","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Harsh Jhamtani","position":"Graduate Student","organization":"Carnegie Mellon University","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Yangfeng Ji","position":"Assistant Professor","organization":"University of Virginia","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Shailza Jolly","position":"Graduate Student","organization":"Technical University of Kaiserslautern and DFKI, Germany","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Dhruv Kumar","position":"","organization":"University of Waterloo","website":"https://ddhruvkr.github.io/","twitter":"@ddhruvkr","note":"","tags":["GEMv1"]},{"name":"Faisal Ladhak","position":"Graduate Student","organization":"Columbia University","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Aman Madaan","position":"Graduate Student","organization":"Carnegie Mellon University","website":"https://madaan.github.io","twitter":"@aman_madaan","note":"","tags":["GEMv1"]},{"name":"Khyati Mahajan","position":"Graduate Student","organization":"UNC Charlotte","website":"https://khyatimahajan.com/","twitter":"","note":"","tags":["GEMv1"]},{"name":"Saad Mahamood","position":"Researcher","organization":"trivago","website":"https://saad.me.uk","twitter":"@Saad_M","note":null,"tags":["GEMv1"]},{"name":"Angie McMillan-Major","position":"Graduate Student","organization":"University of Washington, Huggingface","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Simon Mille","position":"Researcher","organization":"Pompeu Fabra University","website":"https://scholar.google.es/citations?user=F8kFik0AAAAJ\u0026hl=en","twitter":"","note":"","tags":["GEMv1"]},{"name":"Shashi Narayan","position":"Research Scientist","organization":"Google Research","website":"https://sites.google.com/corp/view/shashinarayan/home","twitter":"","note":"","tags":["GEMv1"]},{"name":"Vitaly Nikolaev","position":"Linguist","organization":"Google Research","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Rubungo Andre Niyongabo","position":"Graduate Student","organization":"Polytechnic University of Catalonia","website":"https://www.linkedin.com/in/rubungo-andre-niyongabo-851370168","twitter":"@andre_niyongabo","note":"","tags":["GEMv1"]},{"name":"Salomey Osei","position":"Graduate Student","organization":"Kwame Nkrumah University of Science and Technology","website":"https://www.linkedin.com/in/salomey-osei-4b08a5b8","twitter":"@NanaYaaSally","note":null,"tags":["GEMv1"]},{"name":"Niranjan Ramesh Rao","position":"Graduate Student","organization":"National Institute of Technology Karnataka India","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Vikas Raunak","position":"Research Scientist","organization":"Microsoft","website":"https://vyraun.github.io/","twitter":"@vyraun","note":"","tags":["GEMv1"]},{"name":"Sashank Santhanam","position":"Graduate Student","organization":"UNC Charlotte/ JP Morgan","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"João Sedoc","position":"Assistant Professor","organization":"New York University Stern School of Business","website":"https://www.joaosedoc.com","twitter":"@joaosedoc","note":"","tags":["GEMv1"]},{"name":"Hendrik Strobelt","position":"Researcher","organization":"IBM Research","website":"http://hendrik.strobelt.com","twitter":"@hen_str","note":"","tags":["GEMv1"]},{"name":"Nishant Subramani","position":"Predoctoral Investigator","organization":"Allen institute for AI","website":"https://hatat5.github.io","twitter":"@nsubramani23","note":"Looking for PhD positions to start Fall 2023","tags":["GEMv1"]},{"name":"Emiel van Miltenburg","position":"Assistant Professor","organization":"Tilburg University","website":"https://emielvanmiltenburg.nl","twitter":"@evanmiltenburg","note":"","tags":["GEMv1"]},{"name":"Diyi Yang","position":"Assistant Professor","organization":"Georgia Tech","website":"","twitter":"@diyi_yang","note":"","tags":["GEMv1"]},{"name":"Yacine Jernite","position":"Research Scientist","organization":"Huggingface","website":"https://yjernite.github.io/","twitter":"@YJernite","note":"","tags":["GEMv1"]},{"name":"Akhila Yerukola","position":"AI Researcher","organization":"Samsung Research","website":"http://akhila-yerukola.github.io/","twitter":"@akhila-yerukola","note":"","tags":["GEMv1"]},{"name":"Jiawei Zhou","position":"Graduate Student","organization":"Harvard University","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Nivranshu Pasricha","position":"","organization":"National University of Ireland Galway","website":"","twitter":"","note":""},{"name":"Anant Khandelwal","position":"Applied Scientist","organization":"Amazon","website":"","twitter":"","note":""},{"name":"Sanja Stajner","position":"","organization":"Symanto Research","website":"","twitter":"","note":""},{"name":"Yi-Ling Chung","position":"","organization":"University of Trento, Fondazione Bruno Kessler","website":"","twitter":"","note":""},{"name":"Jekaterina Novikova","position":"Research Lead","organization":"Winterlight","website":"","twitter":"","note":""},{"name":"Alex Wang","position":"","organization":"New York University","website":"","twitter":"","note":""},{"name":"Daniel Deutsch","position":"","organization":"University of Pennsylvania","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Mihir Sanjay Kale","position":"","organization":"Google","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Khyathi Majahan","position":"","organization":"UNC Charlotte","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Pawan Kumar Rajpoot","position":"","organization":"Rakuten India","website":"https://www.linkedin.com/in/pawanrajpoot/","twitter":"","note":""},{"name":"Nico Daheim","position":"","organization":"RWTH Aachen","website":"https://ndaheim.github.io","twitter":"","note":"Currently applying for PhD positions!"},{"name":"Thomas Scialom","position":"","organization":"Sorbonne university","website":"","twitter":"","note":""},{"name":"Vipul Raheja","position":"","organization":"Grammarly","website":"","twitter":"","note":""},{"name":"Mohit Sudhakar","position":"","organization":"Google","website":"","twitter":"","note":""},{"name":"Leonardo Ribeiro","position":"","organization":"TU Darmstadt","website":"","twitter":"","note":""},{"name":"Steven Feng","position":"PhD Student in Computer Science","organization":"Stanford University","website":"https://styfeng.github.io/","twitter":"@stevenyfeng","note":""},{"name":"Michael White","position":"","organization":"The Ohio State University / Facebook","website":"https://u.osu.edu/mwhite","twitter":"@mwhite14850","note":"Looking for a postdoc :)"},{"name":"Alex Fabbri","position":"","organization":"Salesforce","website":"","twitter":"","note":""},{"name":"Wout Schellaert","position":"","organization":"UPValencia","website":"","twitter":"","note":""},{"name":"Sebastien Montella","position":"","organization":"Orange Labs","website":"","twitter":"","note":""},{"name":"Bonaventure Dossou","position":"","organization":"Jacobs University Bremen, Mila Quebec AI Institute","website":"","twitter":"","note":""},{"name":"Anna Shvets","position":"","organization":"FabLab by Inetum","website":"","twitter":"","note":""},{"name":"SK Mainul Islam","position":"","organization":"IIT Kharagpur","website":"","twitter":"","note":""},{"name":"Mohammad Sanad Zaki","position":"","organization":"Google","website":"","twitter":"","note":""},{"name":"Richard Plant","position":"","organization":"Edinburgh Napier University","website":"","twitter":"","note":""},{"name":"Nick Doiron","position":"","organization":"Tufts University / HPE","website":"","twitter":"","note":""},{"name":"Dakuo Wang","position":"","organization":"IBM Research","website":"","twitter":"","note":""},{"name":"Jenny Chim","position":"","organization":"Queen Mary University of London","website":"https://j-chim.github.io/","twitter":"@jennycwchim","note":""},{"name":"Rabin Banjade","position":"","organization":"University of Memphis","website":"","twitter":"","note":""},{"name":"Ronald Cardenas","position":"","organization":"University of Edinburgh","website":"","twitter":"","note":""},{"name":"Dongyeop Kang","position":"","organization":"University of Minnesota","website":"https://dykang.github.io/","twitter":"https://twitter.com/dongyeopkang","note":"Looking for PhD students"},{"name":"Elizabeth Clark","position":"Research Scientist","organization":"Google","website":"","twitter":"","note":""},{"name":"Joshua Maynez","position":"","organization":"Google","website":"","twitter":"","note":""},{"name":"Abhik Bhattacharjee","position":"","organization":"Bangladesh University of Engineering and Technology","website":"","twitter":"","note":""},{"name":"Tahmid Hasan","position":"","organization":"Bangladesh University of Engineering and Technology","website":"","twitter":"","note":""},{"name":"Ananth Rs","position":"","organization":"Microsoft","website":"","twitter":"","note":""},{"name":"Aadesh Gupta","position":"","organization":"Amelia, Senior RnD","website":"","twitter":"","note":""},{"name":"Eleftheria Briakou","position":"","organization":"University of Maryland, College Park","website":"","twitter":"","note":""},{"name":"Joel R Tetreault","position":"","organization":"Dataminr","website":"","twitter":"","note":""},{"name":"Roee Aharoni","position":"","organization":"Google","website":"","twitter":"","note":""},{"name":"Ashish Shrivastava","position":"","organization":"Agara.ai","website":"","twitter":"","note":""},{"name":"Abinaya Mahendiran","position":"Manager Data Science","organization":"NEXT Labs, Mphasis","website":"https://abinayam02.github.io/home/","twitter":"@feakynut","note":"Looking for collaborations"},{"name":"Jordan Clive","position":"","organization":"Imperial College London","website":"","twitter":"","note":"NLG Collaborations Welcome"},{"name":"Samuel Cahyawijaya","position":"","organization":"HKUST","website":"","twitter":"","note":""},{"name":"Qi Zhu","position":"","organization":"Tsinghua University","website":"","twitter":"","note":""},{"name":"Bryan Wilie","position":"","organization":"The Hong Kong University of Science and Technology","website":"","twitter":"","note":""},{"name":"Mathias Creutz","position":"","organization":"University of Helsinki","website":"","twitter":"","note":""},{"name":"Craig Thomson","position":"","organization":"University of Aberdeen","website":"","twitter":"","note":""},{"name":"Alexandros Papangelis","position":"","organization":"Amazon Alexa AI","website":"","twitter":"","note":""},{"name":"Angelina McMillan-Major","position":"","organization":"Hugging Face","website":"","twitter":"","note":""},{"name":"Ashish Upadhyay","position":"","organization":"Robert Gordon University","website":"","twitter":"","note":""},{"name":"Leonardo F. R. Ribeiro","position":"","organization":"Technical University of Darmstadt","website":"","twitter":"","note":""},{"name":"Moussa Kamal Eddine","position":"","organization":"École Polytechnique","website":"","twitter":"","note":""},{"name":"Reno Kriz","position":"","organization":"Johns Hopkins University","website":"","twitter":"","note":""},{"name":"Di Jin","position":"","organization":"Amazon Alexa AI","website":"","twitter":"","note":""},{"name":"Li Zhang","position":"","organization":"University of Pennsylvania","website":"","twitter":"","note":""},{"name":"Filip Ginter","position":"","organization":"University of Turku","website":"","twitter":"","note":""},{"name":"Jenna Kanerva","position":"","organization":"University of Turku","website":"","twitter":"","note":""},{"name":"Dimitra Gkatzia","position":"","organization":"Edinburgh Napier University","website":"","twitter":"","note":""},{"name":"Genta Indra Winata","position":"","organization":"The Hong Kong University of Science and Technology","website":"","twitter":"","note":""},{"name":"Ratish Puduppully","position":"","organization":"University of Edinburgh","website":"","twitter":"","note":""},{"name":"Sanja Štajner","position":"","organization":"Symanto Research","website":"","twitter":"","note":""},{"name":"Mihir Sanjay Kale","position":"","organization":"Google Research","website":"","twitter":"","note":""},{"name":"Chandra Bhagavatula","position":"","organization":"Allen Institute for AI","website":"","twitter":"","note":""},{"name":"Deyi Xiong","position":"","organization":"Tianjin University","website":"","twitter":"","note":""},{"name":"Juraj Juraska","position":"","organization":"University of California, Santa Cruz","website":"","twitter":"","note":""},{"name":"Tianhao Shen","position":"","organization":"Tianjin University","website":"","twitter":"","note":""},{"name":"Chaobin You","position":"","organization":"Tianjin University","website":"","twitter":"","note":""},{"name":"Paul Pu Liang","position":"","organization":"Carnegie Mellon University","website":"","twitter":"","note":""},{"name":"Rifat Shahriyar","position":"","organization":"Bangladesh University of Engineering and Technology","website":"","twitter":"","note":""},{"name":"Lewis Tunstall","position":"","organization":"Hugging Face","website":"","twitter":"","note":""},{"name":"Mahima Pushkarna","position":"","organization":"Google Research","website":"","twitter":"","note":""},{"name":"Vivian Tsai","position":"","organization":"Google Research","website":"","twitter":"","note":""},{"name":"Yisi Sang","position":"","organization":"Syracuse University","website":"","twitter":"","note":""},{"name":"Yixin Liu","position":"","organization":"Yale University","website":"","twitter":"","note":""},{"name":"Hiroaki Hayashi","position":"","organization":"Salesforce Research","website":"","twitter":"","note":""},{"name":"Dragomir Radev","position":"","organization":"Yale University","website":"","twitter":"","note":""},{"name":"Bingsheng Yao","position":"","organization":"Rensselaer Polytechnic Institute","website":"","twitter":"","note":""},{"name":"Ying Xu","position":"","organization":"University of Michigan","website":"","twitter":"","note":""}]}},"__N_SSG":true},"page":"/team","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEMv2 Team 2022</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/be720738ed0b38ae.css" as="style"/><link rel="stylesheet" href="/_next/static/css/be720738ed0b38ae.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/2cca2479-7e9f1af5d51da309.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/team-60b30d02a89aa79d.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><div class="utils_headingXl__zlq1q">GEMv2 Team</div><div class="team_description__DjHeY">GEM is a community-driven effort to improve evaluation of natural language generation. It would not be possible without a large group of collaborators to take on challenging tasks. You can see the contributor list to GEMv1 <a href="/team/2021">here</a>.<p>This page acts as a directory of our amazing contributors. If you want to join the organization,  <a href="/team/join">click here to fill out the sign-up form.</a></p></div><div class="team_centered__joWCZ"><section class="team_cards__RqvG4"><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sebastian Gehrmann</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL">Hello World :)</div><div><a href="https://sebastiangehrmann.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@SebGehr" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Antoine Bosselut</h3><p class="team_title__Fwwzf">EPFL</p><div class="team_note__rPDRL"></div><div><a href="https://atcbosselut.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@ABosselut" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Laura Perez-Beltrachini</h3><p class="team_title__Fwwzf">University of Edinburgh</p><div class="team_note__rPDRL"></div><div><a href="http://homepages.inf.ed.ac.uk/lperez/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Samira Shaikh</h3><p class="team_title__Fwwzf">UNC Charlotte</p><div class="team_note__rPDRL"></div><div><a href="https://webpages.uncc.edu/sshaikh2/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Wei Xu</h3><p class="team_title__Fwwzf">Georgia Tech</p><div class="team_note__rPDRL"></div><div><a href="https://cocoxu.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@cocoweixu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Esin Durmus</h3><p class="team_title__Fwwzf">Stanford University</p><div class="team_note__rPDRL"></div><div><a href="http://www.cs.cornell.edu/~esindurmus/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@esindurmusnlp" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Varun Prashant Gangal</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div><div><a href="https://vgtomahawk.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@varungangal" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Tosin Adewumi</h3><p class="team_title__Fwwzf">Luleå University of Technology</p><div class="team_note__rPDRL"></div><div> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@tosintwit" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Pawan Sasanka Ammanamanchi</h3><p class="team_title__Fwwzf">IIIT Hyderabad</p><div class="team_note__rPDRL"></div><div><a href="https://www.pawansasanka.me/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@paws_ed" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Khyathi Raghavi Chandu</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div><div><a href="http://www.cs.cmu.edu/~kchandu/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@khyathi_chandu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Miruna Clinciu</h3><p class="team_title__Fwwzf">Edinburgh Centre for Robotics</p><div class="team_note__rPDRL"></div><div><a href="https://www.mirunaclinciu.com" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@MirunaClinciu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Kaustubh Dhole</h3><p class="team_title__Fwwzf">Amelia R&amp;D, New York</p><div class="team_note__rPDRL"></div><div><a href="https://in.linkedin.com/in/kaustubh-dhol%C3%A9-3929b32a" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@KaustubhDhole" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ondřej Dušek</h3><p class="team_title__Fwwzf">Charles University, Prague</p><div class="team_note__rPDRL"></div><div><a href="https://tuetschek.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@tuetschek" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Chris Chineye Emezue</h3><p class="team_title__Fwwzf">Technical University Munich</p><div class="team_note__rPDRL"></div><div><a href="https://github.com/chrisemezue" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@ChrisEmezue" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Cristina Garbacea</h3><p class="team_title__Fwwzf">University of Michigan, Ann Arbor</p><div class="team_note__rPDRL"></div><div><a href="https://cristinagarbacea.com/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@ggarbacea" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yufang Hou</h3><p class="team_title__Fwwzf">IBM Research</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Harsh Jhamtani</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yangfeng Ji</h3><p class="team_title__Fwwzf">University of Virginia</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Shailza Jolly</h3><p class="team_title__Fwwzf">Technical University of Kaiserslautern and DFKI, Germany</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dhruv Kumar</h3><p class="team_title__Fwwzf">University of Waterloo</p><div class="team_note__rPDRL"></div><div><a href="https://ddhruvkr.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@ddhruvkr" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Faisal Ladhak</h3><p class="team_title__Fwwzf">Columbia University</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Aman Madaan</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div><div><a href="https://madaan.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@aman_madaan" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Khyati Mahajan</h3><p class="team_title__Fwwzf">UNC Charlotte</p><div class="team_note__rPDRL"></div><div><a href="https://khyatimahajan.com/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Saad Mahamood</h3><p class="team_title__Fwwzf">trivago</p><div class="team_note__rPDRL"></div><div><a href="https://saad.me.uk" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@Saad_M" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Angie McMillan-Major</h3><p class="team_title__Fwwzf">University of Washington, Huggingface</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Simon Mille</h3><p class="team_title__Fwwzf">Pompeu Fabra University</p><div class="team_note__rPDRL"></div><div><a href="https://scholar.google.es/citations?user=F8kFik0AAAAJ&amp;hl=en" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Shashi Narayan</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div><div><a href="https://sites.google.com/corp/view/shashinarayan/home" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Vitaly Nikolaev</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Rubungo Andre Niyongabo</h3><p class="team_title__Fwwzf">Polytechnic University of Catalonia</p><div class="team_note__rPDRL"></div><div><a href="https://www.linkedin.com/in/rubungo-andre-niyongabo-851370168" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@andre_niyongabo" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Salomey Osei</h3><p class="team_title__Fwwzf">Kwame Nkrumah University of Science and Technology</p><div class="team_note__rPDRL"></div><div><a href="https://www.linkedin.com/in/salomey-osei-4b08a5b8" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@NanaYaaSally" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Niranjan Ramesh Rao</h3><p class="team_title__Fwwzf">National Institute of Technology Karnataka India</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Vikas Raunak</h3><p class="team_title__Fwwzf">Microsoft</p><div class="team_note__rPDRL"></div><div><a href="https://vyraun.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@vyraun" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sashank Santhanam</h3><p class="team_title__Fwwzf">UNC Charlotte/ JP Morgan</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">João Sedoc</h3><p class="team_title__Fwwzf">New York University Stern School of Business</p><div class="team_note__rPDRL"></div><div><a href="https://www.joaosedoc.com" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@joaosedoc" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Hendrik Strobelt</h3><p class="team_title__Fwwzf">IBM Research</p><div class="team_note__rPDRL"></div><div><a href="http://hendrik.strobelt.com" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@hen_str" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Nishant Subramani</h3><p class="team_title__Fwwzf">Allen institute for AI</p><div class="team_note__rPDRL">Looking for PhD positions to start Fall 2023</div><div><a href="https://hatat5.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@nsubramani23" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Emiel van Miltenburg</h3><p class="team_title__Fwwzf">Tilburg University</p><div class="team_note__rPDRL"></div><div><a href="https://emielvanmiltenburg.nl" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@evanmiltenburg" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Diyi Yang</h3><p class="team_title__Fwwzf">Georgia Tech</p><div class="team_note__rPDRL"></div><div> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@diyi_yang" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yacine Jernite</h3><p class="team_title__Fwwzf">Huggingface</p><div class="team_note__rPDRL"></div><div><a href="https://yjernite.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@YJernite" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Akhila Yerukola</h3><p class="team_title__Fwwzf">Samsung Research</p><div class="team_note__rPDRL"></div><div><a href="http://akhila-yerukola.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@akhila-yerukola" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jiawei Zhou</h3><p class="team_title__Fwwzf">Harvard University</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Nivranshu Pasricha</h3><p class="team_title__Fwwzf">National University of Ireland Galway</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Anant Khandelwal</h3><p class="team_title__Fwwzf">Amazon</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sanja Stajner</h3><p class="team_title__Fwwzf">Symanto Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yi-Ling Chung</h3><p class="team_title__Fwwzf">University of Trento, Fondazione Bruno Kessler</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jekaterina Novikova</h3><p class="team_title__Fwwzf">Winterlight</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Alex Wang</h3><p class="team_title__Fwwzf">New York University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Daniel Deutsch</h3><p class="team_title__Fwwzf">University of Pennsylvania</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mihir Sanjay Kale</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Khyathi Majahan</h3><p class="team_title__Fwwzf">UNC Charlotte</p><div class="team_note__rPDRL"></div><div class="team_tags__rGyvu"><div>GEMv1</div></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Pawan Kumar Rajpoot</h3><p class="team_title__Fwwzf">Rakuten India</p><div class="team_note__rPDRL"></div><div><a href="https://www.linkedin.com/in/pawanrajpoot/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Nico Daheim</h3><p class="team_title__Fwwzf">RWTH Aachen</p><div class="team_note__rPDRL">Currently applying for PhD positions!</div><div><a href="https://ndaheim.github.io" target="_blank"></a> <span class="team_spacer__yxU0o"></span> </div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Thomas Scialom</h3><p class="team_title__Fwwzf">Sorbonne university</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Vipul Raheja</h3><p class="team_title__Fwwzf">Grammarly</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mohit Sudhakar</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Leonardo Ribeiro</h3><p class="team_title__Fwwzf">TU Darmstadt</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Steven Feng</h3><p class="team_title__Fwwzf">Stanford University</p><div class="team_note__rPDRL"></div><div><a href="https://styfeng.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@stevenyfeng" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Michael White</h3><p class="team_title__Fwwzf">The Ohio State University / Facebook</p><div class="team_note__rPDRL">Looking for a postdoc :)</div><div><a href="https://u.osu.edu/mwhite" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@mwhite14850" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Alex Fabbri</h3><p class="team_title__Fwwzf">Salesforce</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Wout Schellaert</h3><p class="team_title__Fwwzf">UPValencia</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sebastien Montella</h3><p class="team_title__Fwwzf">Orange Labs</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Bonaventure Dossou</h3><p class="team_title__Fwwzf">Jacobs University Bremen, Mila Quebec AI Institute</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Anna Shvets</h3><p class="team_title__Fwwzf">FabLab by Inetum</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">SK Mainul Islam</h3><p class="team_title__Fwwzf">IIT Kharagpur</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mohammad Sanad Zaki</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Richard Plant</h3><p class="team_title__Fwwzf">Edinburgh Napier University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Nick Doiron</h3><p class="team_title__Fwwzf">Tufts University / HPE</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dakuo Wang</h3><p class="team_title__Fwwzf">IBM Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jenny Chim</h3><p class="team_title__Fwwzf">Queen Mary University of London</p><div class="team_note__rPDRL"></div><div><a href="https://j-chim.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@jennycwchim" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Rabin Banjade</h3><p class="team_title__Fwwzf">University of Memphis</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ronald Cardenas</h3><p class="team_title__Fwwzf">University of Edinburgh</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dongyeop Kang</h3><p class="team_title__Fwwzf">University of Minnesota</p><div class="team_note__rPDRL">Looking for PhD students</div><div><a href="https://dykang.github.io/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/https://twitter.com/dongyeopkang" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Elizabeth Clark</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Joshua Maynez</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Abhik Bhattacharjee</h3><p class="team_title__Fwwzf">Bangladesh University of Engineering and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Tahmid Hasan</h3><p class="team_title__Fwwzf">Bangladesh University of Engineering and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ananth Rs</h3><p class="team_title__Fwwzf">Microsoft</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Aadesh Gupta</h3><p class="team_title__Fwwzf">Amelia, Senior RnD</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Eleftheria Briakou</h3><p class="team_title__Fwwzf">University of Maryland, College Park</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Joel R Tetreault</h3><p class="team_title__Fwwzf">Dataminr</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Roee Aharoni</h3><p class="team_title__Fwwzf">Google</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ashish Shrivastava</h3><p class="team_title__Fwwzf">Agara.ai</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Abinaya Mahendiran</h3><p class="team_title__Fwwzf">NEXT Labs, Mphasis</p><div class="team_note__rPDRL">Looking for collaborations</div><div><a href="https://abinayam02.github.io/home/" target="_blank"></a> <span class="team_spacer__yxU0o"></span> <a href="https://twitter.com/@feakynut" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jordan Clive</h3><p class="team_title__Fwwzf">Imperial College London</p><div class="team_note__rPDRL">NLG Collaborations Welcome</div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Samuel Cahyawijaya</h3><p class="team_title__Fwwzf">HKUST</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Qi Zhu</h3><p class="team_title__Fwwzf">Tsinghua University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Bryan Wilie</h3><p class="team_title__Fwwzf">The Hong Kong University of Science and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mathias Creutz</h3><p class="team_title__Fwwzf">University of Helsinki</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Craig Thomson</h3><p class="team_title__Fwwzf">University of Aberdeen</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Alexandros Papangelis</h3><p class="team_title__Fwwzf">Amazon Alexa AI</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Angelina McMillan-Major</h3><p class="team_title__Fwwzf">Hugging Face</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ashish Upadhyay</h3><p class="team_title__Fwwzf">Robert Gordon University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Leonardo F. R. Ribeiro</h3><p class="team_title__Fwwzf">Technical University of Darmstadt</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Moussa Kamal Eddine</h3><p class="team_title__Fwwzf">École Polytechnique</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Reno Kriz</h3><p class="team_title__Fwwzf">Johns Hopkins University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Di Jin</h3><p class="team_title__Fwwzf">Amazon Alexa AI</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Li Zhang</h3><p class="team_title__Fwwzf">University of Pennsylvania</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Filip Ginter</h3><p class="team_title__Fwwzf">University of Turku</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Jenna Kanerva</h3><p class="team_title__Fwwzf">University of Turku</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dimitra Gkatzia</h3><p class="team_title__Fwwzf">Edinburgh Napier University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Genta Indra Winata</h3><p class="team_title__Fwwzf">The Hong Kong University of Science and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ratish Puduppully</h3><p class="team_title__Fwwzf">University of Edinburgh</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Sanja Štajner</h3><p class="team_title__Fwwzf">Symanto Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mihir Sanjay Kale</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Chandra Bhagavatula</h3><p class="team_title__Fwwzf">Allen Institute for AI</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Deyi Xiong</h3><p class="team_title__Fwwzf">Tianjin University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Juraj Juraska</h3><p class="team_title__Fwwzf">University of California, Santa Cruz</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Tianhao Shen</h3><p class="team_title__Fwwzf">Tianjin University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Chaobin You</h3><p class="team_title__Fwwzf">Tianjin University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Paul Pu Liang</h3><p class="team_title__Fwwzf">Carnegie Mellon University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Rifat Shahriyar</h3><p class="team_title__Fwwzf">Bangladesh University of Engineering and Technology</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Lewis Tunstall</h3><p class="team_title__Fwwzf">Hugging Face</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Mahima Pushkarna</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Vivian Tsai</h3><p class="team_title__Fwwzf">Google Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yisi Sang</h3><p class="team_title__Fwwzf">Syracuse University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Yixin Liu</h3><p class="team_title__Fwwzf">Yale University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Hiroaki Hayashi</h3><p class="team_title__Fwwzf">Salesforce Research</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Dragomir Radev</h3><p class="team_title__Fwwzf">Yale University</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Bingsheng Yao</h3><p class="team_title__Fwwzf">Rensselaer Polytechnic Institute</p><div class="team_note__rPDRL"></div></div><div class="team_card__yrnb2"><h3 class="team_name__nlBxC">Ying Xu</h3><p class="team_title__Fwwzf">University of Michigan</p><div class="team_note__rPDRL"></div></div></section></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"teamData":{"teamMembers":[{"name":"Sebastian Gehrmann","position":"Senior Research Scientist","organization":"Google Research","website":"https://sebastiangehrmann.github.io","twitter":"@SebGehr","note":"Hello World :)","tags":["GEMv1"]},{"name":"Antoine Bosselut","position":"Assistant Professor","organization":"EPFL","website":"https://atcbosselut.github.io/","twitter":"@ABosselut","note":"","tags":["GEMv1"]},{"name":"Laura Perez-Beltrachini","position":"Research Assistant","organization":"University of Edinburgh","website":"http://homepages.inf.ed.ac.uk/lperez/","twitter":"","note":"","tags":["GEMv1"]},{"name":"Samira Shaikh","position":"Assistant Professor","organization":"UNC Charlotte","website":"https://webpages.uncc.edu/sshaikh2/","twitter":"","note":"","tags":["GEMv1"]},{"name":"Wei Xu","position":"Assistant Professor","organization":"Georgia Tech","website":"https://cocoxu.github.io/","twitter":"@cocoweixu","note":"","tags":["GEMv1"]},{"name":"Esin Durmus","position":"Postdoc","organization":"Stanford University","website":"http://www.cs.cornell.edu/~esindurmus/","twitter":"@esindurmusnlp","note":"","tags":["GEMv1"]},{"name":"Varun Prashant Gangal","position":"Graduate Student","organization":"Carnegie Mellon University","website":"https://vgtomahawk.github.io/","twitter":"@varungangal","note":"","tags":["GEMv1"]},{"name":"Tosin Adewumi","position":"","organization":"Luleå University of Technology","website":"","twitter":"@tosintwit","note":"","tags":["GEMv1"]},{"name":"Pawan Sasanka Ammanamanchi","position":"Graduate Student","organization":"IIIT Hyderabad","website":"https://www.pawansasanka.me/","twitter":"@paws_ed","note":"","tags":["GEMv1"]},{"name":"Khyathi Raghavi Chandu","position":"Graduate Student","organization":"Carnegie Mellon University","website":"http://www.cs.cmu.edu/~kchandu/","twitter":"@khyathi_chandu","note":"","tags":["GEMv1"]},{"name":"Miruna Clinciu","position":"Graduate Student","organization":"Edinburgh Centre for Robotics","website":"https://www.mirunaclinciu.com","twitter":"@MirunaClinciu","note":"","tags":["GEMv1"]},{"name":"Kaustubh Dhole","position":"Research Lead","organization":"Amelia R\u0026D, New York","website":"https://in.linkedin.com/in/kaustubh-dhol%C3%A9-3929b32a","twitter":"@KaustubhDhole","note":"","tags":["GEMv1"]},{"name":"Ondřej Dušek","position":"Assistant Professor","organization":"Charles University, Prague","website":"https://tuetschek.github.io","twitter":"@tuetschek","note":"","tags":["GEMv1"]},{"name":"Chris Chineye Emezue","position":"Graduate Student","organization":"Technical University Munich","website":"https://github.com/chrisemezue","twitter":"@ChrisEmezue","note":"","tags":["GEMv1"]},{"name":"Cristina Garbacea","position":"Graduate Student","organization":"University of Michigan, Ann Arbor","website":"https://cristinagarbacea.com/","twitter":"@ggarbacea","note":"","tags":["GEMv1"]},{"name":"Yufang Hou","position":"Researcher","organization":"IBM Research","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Harsh Jhamtani","position":"Graduate Student","organization":"Carnegie Mellon University","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Yangfeng Ji","position":"Assistant Professor","organization":"University of Virginia","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Shailza Jolly","position":"Graduate Student","organization":"Technical University of Kaiserslautern and DFKI, Germany","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Dhruv Kumar","position":"","organization":"University of Waterloo","website":"https://ddhruvkr.github.io/","twitter":"@ddhruvkr","note":"","tags":["GEMv1"]},{"name":"Faisal Ladhak","position":"Graduate Student","organization":"Columbia University","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Aman Madaan","position":"Graduate Student","organization":"Carnegie Mellon University","website":"https://madaan.github.io","twitter":"@aman_madaan","note":"","tags":["GEMv1"]},{"name":"Khyati Mahajan","position":"Graduate Student","organization":"UNC Charlotte","website":"https://khyatimahajan.com/","twitter":"","note":"","tags":["GEMv1"]},{"name":"Saad Mahamood","position":"Researcher","organization":"trivago","website":"https://saad.me.uk","twitter":"@Saad_M","note":null,"tags":["GEMv1"]},{"name":"Angie McMillan-Major","position":"Graduate Student","organization":"University of Washington, Huggingface","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Simon Mille","position":"Researcher","organization":"Pompeu Fabra University","website":"https://scholar.google.es/citations?user=F8kFik0AAAAJ\u0026hl=en","twitter":"","note":"","tags":["GEMv1"]},{"name":"Shashi Narayan","position":"Research Scientist","organization":"Google Research","website":"https://sites.google.com/corp/view/shashinarayan/home","twitter":"","note":"","tags":["GEMv1"]},{"name":"Vitaly Nikolaev","position":"Linguist","organization":"Google Research","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Rubungo Andre Niyongabo","position":"Graduate Student","organization":"Polytechnic University of Catalonia","website":"https://www.linkedin.com/in/rubungo-andre-niyongabo-851370168","twitter":"@andre_niyongabo","note":"","tags":["GEMv1"]},{"name":"Salomey Osei","position":"Graduate Student","organization":"Kwame Nkrumah University of Science and Technology","website":"https://www.linkedin.com/in/salomey-osei-4b08a5b8","twitter":"@NanaYaaSally","note":null,"tags":["GEMv1"]},{"name":"Niranjan Ramesh Rao","position":"Graduate Student","organization":"National Institute of Technology Karnataka India","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Vikas Raunak","position":"Research Scientist","organization":"Microsoft","website":"https://vyraun.github.io/","twitter":"@vyraun","note":"","tags":["GEMv1"]},{"name":"Sashank Santhanam","position":"Graduate Student","organization":"UNC Charlotte/ JP Morgan","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"João Sedoc","position":"Assistant Professor","organization":"New York University Stern School of Business","website":"https://www.joaosedoc.com","twitter":"@joaosedoc","note":"","tags":["GEMv1"]},{"name":"Hendrik Strobelt","position":"Researcher","organization":"IBM Research","website":"http://hendrik.strobelt.com","twitter":"@hen_str","note":"","tags":["GEMv1"]},{"name":"Nishant Subramani","position":"Predoctoral Investigator","organization":"Allen institute for AI","website":"https://hatat5.github.io","twitter":"@nsubramani23","note":"Looking for PhD positions to start Fall 2023","tags":["GEMv1"]},{"name":"Emiel van Miltenburg","position":"Assistant Professor","organization":"Tilburg University","website":"https://emielvanmiltenburg.nl","twitter":"@evanmiltenburg","note":"","tags":["GEMv1"]},{"name":"Diyi Yang","position":"Assistant Professor","organization":"Georgia Tech","website":"","twitter":"@diyi_yang","note":"","tags":["GEMv1"]},{"name":"Yacine Jernite","position":"Research Scientist","organization":"Huggingface","website":"https://yjernite.github.io/","twitter":"@YJernite","note":"","tags":["GEMv1"]},{"name":"Akhila Yerukola","position":"AI Researcher","organization":"Samsung Research","website":"http://akhila-yerukola.github.io/","twitter":"@akhila-yerukola","note":"","tags":["GEMv1"]},{"name":"Jiawei Zhou","position":"Graduate Student","organization":"Harvard University","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Nivranshu Pasricha","position":"","organization":"National University of Ireland Galway","website":"","twitter":"","note":""},{"name":"Anant Khandelwal","position":"Applied Scientist","organization":"Amazon","website":"","twitter":"","note":""},{"name":"Sanja Stajner","position":"","organization":"Symanto Research","website":"","twitter":"","note":""},{"name":"Yi-Ling Chung","position":"","organization":"University of Trento, Fondazione Bruno Kessler","website":"","twitter":"","note":""},{"name":"Jekaterina Novikova","position":"Research Lead","organization":"Winterlight","website":"","twitter":"","note":""},{"name":"Alex Wang","position":"","organization":"New York University","website":"","twitter":"","note":""},{"name":"Daniel Deutsch","position":"","organization":"University of Pennsylvania","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Mihir Sanjay Kale","position":"","organization":"Google","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Khyathi Majahan","position":"","organization":"UNC Charlotte","website":"","twitter":"","note":"","tags":["GEMv1"]},{"name":"Pawan Kumar Rajpoot","position":"","organization":"Rakuten India","website":"https://www.linkedin.com/in/pawanrajpoot/","twitter":"","note":""},{"name":"Nico Daheim","position":"","organization":"RWTH Aachen","website":"https://ndaheim.github.io","twitter":"","note":"Currently applying for PhD positions!"},{"name":"Thomas Scialom","position":"","organization":"Sorbonne university","website":"","twitter":"","note":""},{"name":"Vipul Raheja","position":"","organization":"Grammarly","website":"","twitter":"","note":""},{"name":"Mohit Sudhakar","position":"","organization":"Google","website":"","twitter":"","note":""},{"name":"Leonardo Ribeiro","position":"","organization":"TU Darmstadt","website":"","twitter":"","note":""},{"name":"Steven Feng","position":"PhD Student in Computer Science","organization":"Stanford University","website":"https://styfeng.github.io/","twitter":"@stevenyfeng","note":""},{"name":"Michael White","position":"","organization":"The Ohio State University / Facebook","website":"https://u.osu.edu/mwhite","twitter":"@mwhite14850","note":"Looking for a postdoc :)"},{"name":"Alex Fabbri","position":"","organization":"Salesforce","website":"","twitter":"","note":""},{"name":"Wout Schellaert","position":"","organization":"UPValencia","website":"","twitter":"","note":""},{"name":"Sebastien Montella","position":"","organization":"Orange Labs","website":"","twitter":"","note":""},{"name":"Bonaventure Dossou","position":"","organization":"Jacobs University Bremen, Mila Quebec AI Institute","website":"","twitter":"","note":""},{"name":"Anna Shvets","position":"","organization":"FabLab by Inetum","website":"","twitter":"","note":""},{"name":"SK Mainul Islam","position":"","organization":"IIT Kharagpur","website":"","twitter":"","note":""},{"name":"Mohammad Sanad Zaki","position":"","organization":"Google","website":"","twitter":"","note":""},{"name":"Richard Plant","position":"","organization":"Edinburgh Napier University","website":"","twitter":"","note":""},{"name":"Nick Doiron","position":"","organization":"Tufts University / HPE","website":"","twitter":"","note":""},{"name":"Dakuo Wang","position":"","organization":"IBM Research","website":"","twitter":"","note":""},{"name":"Jenny Chim","position":"","organization":"Queen Mary University of London","website":"https://j-chim.github.io/","twitter":"@jennycwchim","note":""},{"name":"Rabin Banjade","position":"","organization":"University of Memphis","website":"","twitter":"","note":""},{"name":"Ronald Cardenas","position":"","organization":"University of Edinburgh","website":"","twitter":"","note":""},{"name":"Dongyeop Kang","position":"","organization":"University of Minnesota","website":"https://dykang.github.io/","twitter":"https://twitter.com/dongyeopkang","note":"Looking for PhD students"},{"name":"Elizabeth Clark","position":"Research Scientist","organization":"Google","website":"","twitter":"","note":""},{"name":"Joshua Maynez","position":"","organization":"Google","website":"","twitter":"","note":""},{"name":"Abhik Bhattacharjee","position":"","organization":"Bangladesh University of Engineering and Technology","website":"","twitter":"","note":""},{"name":"Tahmid Hasan","position":"","organization":"Bangladesh University of Engineering and Technology","website":"","twitter":"","note":""},{"name":"Ananth Rs","position":"","organization":"Microsoft","website":"","twitter":"","note":""},{"name":"Aadesh Gupta","position":"","organization":"Amelia, Senior RnD","website":"","twitter":"","note":""},{"name":"Eleftheria Briakou","position":"","organization":"University of Maryland, College Park","website":"","twitter":"","note":""},{"name":"Joel R Tetreault","position":"","organization":"Dataminr","website":"","twitter":"","note":""},{"name":"Roee Aharoni","position":"","organization":"Google","website":"","twitter":"","note":""},{"name":"Ashish Shrivastava","position":"","organization":"Agara.ai","website":"","twitter":"","note":""},{"name":"Abinaya Mahendiran","position":"Manager Data Science","organization":"NEXT Labs, Mphasis","website":"https://abinayam02.github.io/home/","twitter":"@feakynut","note":"Looking for collaborations"},{"name":"Jordan Clive","position":"","organization":"Imperial College London","website":"","twitter":"","note":"NLG Collaborations Welcome"},{"name":"Samuel Cahyawijaya","position":"","organization":"HKUST","website":"","twitter":"","note":""},{"name":"Qi Zhu","position":"","organization":"Tsinghua University","website":"","twitter":"","note":""},{"name":"Bryan Wilie","position":"","organization":"The Hong Kong University of Science and Technology","website":"","twitter":"","note":""},{"name":"Mathias Creutz","position":"","organization":"University of Helsinki","website":"","twitter":"","note":""},{"name":"Craig Thomson","position":"","organization":"University of Aberdeen","website":"","twitter":"","note":""},{"name":"Alexandros Papangelis","position":"","organization":"Amazon Alexa AI","website":"","twitter":"","note":""},{"name":"Angelina McMillan-Major","position":"","organization":"Hugging Face","website":"","twitter":"","note":""},{"name":"Ashish Upadhyay","position":"","organization":"Robert Gordon University","website":"","twitter":"","note":""},{"name":"Leonardo F. R. Ribeiro","position":"","organization":"Technical University of Darmstadt","website":"","twitter":"","note":""},{"name":"Moussa Kamal Eddine","position":"","organization":"École Polytechnique","website":"","twitter":"","note":""},{"name":"Reno Kriz","position":"","organization":"Johns Hopkins University","website":"","twitter":"","note":""},{"name":"Di Jin","position":"","organization":"Amazon Alexa AI","website":"","twitter":"","note":""},{"name":"Li Zhang","position":"","organization":"University of Pennsylvania","website":"","twitter":"","note":""},{"name":"Filip Ginter","position":"","organization":"University of Turku","website":"","twitter":"","note":""},{"name":"Jenna Kanerva","position":"","organization":"University of Turku","website":"","twitter":"","note":""},{"name":"Dimitra Gkatzia","position":"","organization":"Edinburgh Napier University","website":"","twitter":"","note":""},{"name":"Genta Indra Winata","position":"","organization":"The Hong Kong University of Science and Technology","website":"","twitter":"","note":""},{"name":"Ratish Puduppully","position":"","organization":"University of Edinburgh","website":"","twitter":"","note":""},{"name":"Sanja Štajner","position":"","organization":"Symanto Research","website":"","twitter":"","note":""},{"name":"Mihir Sanjay Kale","position":"","organization":"Google Research","website":"","twitter":"","note":""},{"name":"Chandra Bhagavatula","position":"","organization":"Allen Institute for AI","website":"","twitter":"","note":""},{"name":"Deyi Xiong","position":"","organization":"Tianjin University","website":"","twitter":"","note":""},{"name":"Juraj Juraska","position":"","organization":"University of California, Santa Cruz","website":"","twitter":"","note":""},{"name":"Tianhao Shen","position":"","organization":"Tianjin University","website":"","twitter":"","note":""},{"name":"Chaobin You","position":"","organization":"Tianjin University","website":"","twitter":"","note":""},{"name":"Paul Pu Liang","position":"","organization":"Carnegie Mellon University","website":"","twitter":"","note":""},{"name":"Rifat Shahriyar","position":"","organization":"Bangladesh University of Engineering and Technology","website":"","twitter":"","note":""},{"name":"Lewis Tunstall","position":"","organization":"Hugging Face","website":"","twitter":"","note":""},{"name":"Mahima Pushkarna","position":"","organization":"Google Research","website":"","twitter":"","note":""},{"name":"Vivian Tsai","position":"","organization":"Google Research","website":"","twitter":"","note":""},{"name":"Yisi Sang","position":"","organization":"Syracuse University","website":"","twitter":"","note":""},{"name":"Yixin Liu","position":"","organization":"Yale University","website":"","twitter":"","note":""},{"name":"Hiroaki Hayashi","position":"","organization":"Salesforce Research","website":"","twitter":"","note":""},{"name":"Dragomir Radev","position":"","organization":"Yale University","website":"","twitter":"","note":""},{"name":"Bingsheng Yao","position":"","organization":"Rensselaer Polytechnic Institute","website":"","twitter":"","note":""},{"name":"Ying Xu","position":"","organization":"University of Michigan","website":"","twitter":"","note":""}]}},"__N_SSG":true},"page":"/team","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/team/2021.html b/team/2021.html
index 09b33e06..f46fc2a9 100644
--- a/team/2021.html
+++ b/team/2021.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Team 2021</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/6517b3935a1e344f.css" as="style"/><link rel="stylesheet" href="/_next/static/css/6517b3935a1e344f.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/2cca2479-7e9f1af5d51da309.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/team/2021-82841191601aed91.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><div class="utils_headingXl__zlq1q">GEMv1 Team</div><div class="__2021_description__8g5Ob">GEM is a community-driven effort with the goal to improve how progress in natural language generation is measured. It would not be possible without a large group of collaborators to take on challenging tasks.<p>This page acts as a directory of our amazing contributors. If you want to join the organization,  <a href="/team/join"><a>click here to fill out the sign-up form.</a></a></p></div><div class="__2021_centered__5nV8M"><section class="__2021_cards__9JF_K"><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Sebastian Gehrmann</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div><div><a href="https://sebastiangehrmann.github.io" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@SebGehr" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Antoine Bosselut</h3><p class="__2021_title__1S7ct">Stanford University</p><div class="__2021_note__4cN0s"></div><div><a href="https://atcbosselut.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ABosselut" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Laura Perez-Beltrachini</h3><p class="__2021_title__1S7ct">University of Edinburgh</p><div class="__2021_note__4cN0s"></div><div><a href="http://homepages.inf.ed.ac.uk/lperez/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Samira Shaikh</h3><p class="__2021_title__1S7ct">UNC Charlotte</p><div class="__2021_note__4cN0s">Looking for a post-doc</div><div><a href="https://webpages.uncc.edu/sshaikh2/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Wei Xu</h3><p class="__2021_title__1S7ct">Georgia Tech</p><div class="__2021_note__4cN0s"></div><div><a href="https://cocoxu.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@cocoweixu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Esin Durmus</h3><p class="__2021_title__1S7ct">Cornell University</p><div class="__2021_note__4cN0s"></div><div><a href="http://www.cs.cornell.edu/~esindurmus/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@esindurmusnlp" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Varun Prashant Gangal</h3><p class="__2021_title__1S7ct">Carnegie Mellon University</p><div class="__2021_note__4cN0s"></div><div><a href="https://vgtomahawk.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@varungangal" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Tosin Adewumi</h3><p class="__2021_title__1S7ct">Luleå University of Technology</p><div class="__2021_note__4cN0s"></div><div> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@tosintwit" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Karmanya Aggarwal</h3><p class="__2021_title__1S7ct">IIIT Delhi</p><div class="__2021_note__4cN0s"></div><div><a href="https://calmdownkarm.com/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@karmanya" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Pawan Sasanka Ammanamanchi</h3><p class="__2021_title__1S7ct">IIIT Hyderabad</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.pawansasanka.me/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@paws_ed" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Aremu Anuoluwapo</h3><p class="__2021_title__1S7ct">University of Lagos</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Khyathi Chandu</h3><p class="__2021_title__1S7ct">Carnegie Mellon University</p><div class="__2021_note__4cN0s"></div><div><a href="http://www.cs.cmu.edu/~kchandu/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@khyathi_chandu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Miruna Clinciu</h3><p class="__2021_title__1S7ct">Edinburgh Centre for Robotics</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.mirunaclinciu.com/p/about-me.html" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Dipanjan Das</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.dipanjandas.com" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@dipanjand" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Kaustubh Dhole</h3><p class="__2021_title__1S7ct">Amelia R&amp;D, New York</p><div class="__2021_note__4cN0s">Looking for Collaborators</div><div><a href="https://in.linkedin.com/in/kaustubh-dhol%C3%A9-3929b32a" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@KaustubhDhole" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Wanyu Du</h3><p class="__2021_title__1S7ct">University of Virginia</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Esin Durmus</h3><p class="__2021_title__1S7ct">Cornell University</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Ondřej Dušek</h3><p class="__2021_title__1S7ct">Charles University, Prague</p><div class="__2021_note__4cN0s"></div><div><a href="https://tuetschek.github.io" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@tuetschek" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Chris Emezue</h3><p class="__2021_title__1S7ct">Technical University, Munich</p><div class="__2021_note__4cN0s">Looking for PhD positions</div><div><a href="https://github.com/chrisemezue" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ChrisEmezue" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Cristina Garbacea</h3><p class="__2021_title__1S7ct">University of Michigan, Ann Arbor</p><div class="__2021_note__4cN0s"></div><div><a href="https://cristinagarbacea.com/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ggarbacea" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Tatsunori Hashimoto</h3><p class="__2021_title__1S7ct">Stanford University</p><div class="__2021_note__4cN0s"></div><div> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@tatsu_hashimoto" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Yufang Hou</h3><p class="__2021_title__1S7ct">IBM Research</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Harsh Jhamtani</h3><p class="__2021_title__1S7ct">Carnegie Mellon University</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Yangfeng Ji</h3><p class="__2021_title__1S7ct">University of Virginia</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Shailza Jolly</h3><p class="__2021_title__1S7ct">Technical University of Kaiserslautern and DFKI, Germany</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Dhruv Kumar</h3><p class="__2021_title__1S7ct">University of Waterloo</p><div class="__2021_note__4cN0s">Looking for PhD positions</div><div><a href="https://ddhruvkr.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ddhruvkr" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Faisal Ladhak</h3><p class="__2021_title__1S7ct">Columbia University</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Aman Madaan</h3><p class="__2021_title__1S7ct">Carnegie Mellon University</p><div class="__2021_note__4cN0s"></div><div><a href="https://madaan.github.io" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@aman_madaan" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Mounica Maddela</h3><p class="__2021_title__1S7ct">Georgia Tech</p><div class="__2021_note__4cN0s"></div><div><a href="https://mounicam.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@mmaddela1005" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Khyati Mahajan</h3><p class="__2021_title__1S7ct">UNC Charlotte</p><div class="__2021_note__4cN0s"></div><div><a href="https://khyatimahajan.com/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Saad Mahamood</h3><p class="__2021_title__1S7ct">trivago</p><div class="__2021_note__4cN0s"></div><div><a href="https://saad.me.uk" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@Saad_M" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Bodhisattwa Prasad Majumder</h3><p class="__2021_title__1S7ct">University of California, San Diego</p><div class="__2021_note__4cN0s"></div><div><a href="http://www.majumderb.com/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@mbodhisattwa" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Pedro Henrique Martins</h3><p class="__2021_title__1S7ct">Instituto de Telecomunicações</p><div class="__2021_note__4cN0s"></div><div><a href="https://pedrohenriqueamartins.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@pedrohenmartins" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Angie McMillan-Major</h3><p class="__2021_title__1S7ct">University of Washington, Huggingface</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Simon Mille</h3><p class="__2021_title__1S7ct">Pompeu Fabra University</p><div class="__2021_note__4cN0s"></div><div><a href="https://scholar.google.es/citations?user=F8kFik0AAAAJ&amp;hl=en" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Moin Nadeem</h3><p class="__2021_title__1S7ct">MIT</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Shashi Narayan</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div><div><a href="https://sites.google.com/corp/view/shashinarayan/home" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Vitaly Nikolaev</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Rubungo Andre Niyongabo</h3><p class="__2021_title__1S7ct">University of Electronic Science and Technology of China</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.linkedin.com/in/rubungo-andre-niyongabo-851370168" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@andre_niyongabo" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Salomey Osei</h3><p class="__2021_title__1S7ct">Kwame Nkrumah University of Science and Technology</p><div class="__2021_note__4cN0s">Looking for a PhD position</div><div><a href="https://www.linkedin.com/in/salomey-osei-4b08a5b8" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@NanaYaaSally" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Ankur Parikh</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div><div> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ank_parikh" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Niranjan Ramesh Rao</h3><p class="__2021_title__1S7ct">National Institute of Technology Karnataka India</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Vikas Raunak</h3><p class="__2021_title__1S7ct">Microsoft</p><div class="__2021_note__4cN0s"></div><div><a href="https://vyraun.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@vyraun" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Juan Diego Rodriguez</h3><p class="__2021_title__1S7ct">Applied Research Laboratories, The University of Texas at Austin</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Sashank Santhanam</h3><p class="__2021_title__1S7ct">UNC Charlotte</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">João Sedoc</h3><p class="__2021_title__1S7ct">New York University Stern School of Business</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.joaosedoc.com" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@joaosedoc" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Anastasia Shimorina</h3><p class="__2021_title__1S7ct">Université de Lorraine</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Marco Antonio Sobrevilla Cabezudo</h3><p class="__2021_title__1S7ct">University of São Paulo</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Hendrik Strobelt</h3><p class="__2021_title__1S7ct">IBM Research</p><div class="__2021_note__4cN0s"></div><div><a href="http://hendrik.strobelt.com" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@hen_str" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Nishant Subramani</h3><p class="__2021_title__1S7ct">Intelligent Systems Lab, Intel</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Emiel van Miltenburg</h3><p class="__2021_title__1S7ct">Tilburg University</p><div class="__2021_note__4cN0s"></div><div><a href="https://emielvanmiltenburg.nl" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@evanmiltenburg" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Diyi Yang</h3><p class="__2021_title__1S7ct">Georgia Tech</p><div class="__2021_note__4cN0s"></div><div> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@diyi_yang" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Yacine Yernite</h3><p class="__2021_title__1S7ct">Huggingface</p><div class="__2021_note__4cN0s"></div><div><a href="https://yjernite.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@YJernite" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Akhila Yerukola</h3><p class="__2021_title__1S7ct">Samsung Research</p><div class="__2021_note__4cN0s"></div><div><a href="http://akhila-yerukola.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@akhila-yerukola" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Jiawei Zhou</h3><p class="__2021_title__1S7ct">Harvard University</p><div class="__2021_note__4cN0s"></div></div></section></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"teamData":{"teamMembers":[{"name":"Sebastian Gehrmann","position":"Research Scientist","organization":"Google Research","website":"https://sebastiangehrmann.github.io","twitter":"@SebGehr","note":""},{"name":"Antoine Bosselut","position":"Postdoc","organization":"Stanford University","website":"https://atcbosselut.github.io/","twitter":"@ABosselut","note":""},{"name":"Laura Perez-Beltrachini","position":"Research Assistant","organization":"University of Edinburgh","website":"http://homepages.inf.ed.ac.uk/lperez/","twitter":"","note":""},{"name":"Samira Shaikh","position":"Assistant Professor","organization":"UNC Charlotte","website":"https://webpages.uncc.edu/sshaikh2/","twitter":"","note":"Looking for a post-doc"},{"name":"Wei Xu","position":"Assistant Professor","organization":"Georgia Tech","website":"https://cocoxu.github.io/","twitter":"@cocoweixu","note":""},{"name":"Esin Durmus","position":"Graduate Student","organization":"Cornell University","website":"http://www.cs.cornell.edu/~esindurmus/","twitter":"@esindurmusnlp","note":""},{"name":"Varun Prashant Gangal","position":"Graduate Student","organization":"Carnegie Mellon University","website":"https://vgtomahawk.github.io/","twitter":"@varungangal","note":""},{"name":"Tosin Adewumi","position":"","organization":"Luleå University of Technology","website":"","twitter":"@tosintwit","note":""},{"name":"Karmanya Aggarwal","position":"Graduate Student","organization":"IIIT Delhi","website":"https://calmdownkarm.com/","twitter":"@karmanya","note":""},{"name":"Pawan Sasanka Ammanamanchi","position":"Graduate Student","organization":"IIIT Hyderabad","website":"https://www.pawansasanka.me/","twitter":"@paws_ed","note":""},{"name":"Aremu Anuoluwapo","position":"Undergraduate","organization":"University of Lagos","website":"","twitter":"","note":""},{"name":"Khyathi Chandu","position":"Graduate Student","organization":"Carnegie Mellon University","website":"http://www.cs.cmu.edu/~kchandu/","twitter":"@khyathi_chandu","note":""},{"name":"Miruna Clinciu","position":"Graduate Student","organization":"Edinburgh Centre for Robotics","website":"https://www.mirunaclinciu.com/p/about-me.html","twitter":"","note":""},{"name":"Dipanjan Das","position":"Research Scientist","organization":"Google Research","website":"https://www.dipanjandas.com","twitter":"@dipanjand","note":""},{"name":"Kaustubh Dhole","position":"Research Lead","organization":"Amelia R\u0026D, New York","website":"https://in.linkedin.com/in/kaustubh-dhol%C3%A9-3929b32a","twitter":"@KaustubhDhole","note":"Looking for Collaborators"},{"name":"Wanyu Du","position":"Graduate Student","organization":"University of Virginia","website":"","twitter":"","note":""},{"name":"Esin Durmus","position":"Graduate Student","organization":"Cornell University","website":"","twitter":"","note":""},{"name":"Ondřej Dušek","position":"Assistant Professor","organization":"Charles University, Prague","website":"https://tuetschek.github.io","twitter":"@tuetschek","note":""},{"name":"Chris Emezue","position":"Graduate Student","organization":"Technical University, Munich","website":"https://github.com/chrisemezue","twitter":"@ChrisEmezue","note":"Looking for PhD positions"},{"name":"Cristina Garbacea","position":"Graduate Student","organization":"University of Michigan, Ann Arbor","website":"https://cristinagarbacea.com/","twitter":"@ggarbacea","note":""},{"name":"Tatsunori Hashimoto","position":"Assistant Professor","organization":"Stanford University","website":"","twitter":"@tatsu_hashimoto","note":""},{"name":"Yufang Hou","position":"Researcher","organization":"IBM Research","website":"","twitter":"","note":""},{"name":"Harsh Jhamtani","position":"Graduate Student","organization":"Carnegie Mellon University","website":"","twitter":"","note":""},{"name":"Yangfeng Ji","position":"Assistant Professor","organization":"University of Virginia","website":"","twitter":"","note":""},{"name":"Shailza Jolly","position":"Graduate Student","organization":"Technical University of Kaiserslautern and DFKI, Germany","website":"","twitter":"","note":""},{"name":"Dhruv Kumar","position":"","organization":"University of Waterloo","website":"https://ddhruvkr.github.io/","twitter":"@ddhruvkr","note":"Looking for PhD positions"},{"name":"Faisal Ladhak","position":"Graduate Student","organization":"Columbia University","website":"","twitter":"","note":""},{"name":"Aman Madaan","position":"Graduate Student","organization":"Carnegie Mellon University","website":"https://madaan.github.io","twitter":"@aman_madaan","note":""},{"name":"Mounica Maddela","position":"Graduate Student","organization":"Georgia Tech","website":"https://mounicam.github.io/","twitter":"@mmaddela1005","note":""},{"name":"Khyati Mahajan","position":"Graduate Student","organization":"UNC Charlotte","website":"https://khyatimahajan.com/","twitter":"","note":""},{"name":"Saad Mahamood","position":"Researcher","organization":"trivago","website":"https://saad.me.uk","twitter":"@Saad_M","note":null},{"name":"Bodhisattwa Prasad Majumder","position":"Graduate Student","organization":"University of California, San Diego","website":"http://www.majumderb.com/","twitter":"@mbodhisattwa","note":""},{"name":"Pedro Henrique Martins","position":"Graduate Student","organization":"Instituto de Telecomunicações","website":"https://pedrohenriqueamartins.github.io/","twitter":"@pedrohenmartins","note":""},{"name":"Angie McMillan-Major","position":"Graduate Student","organization":"University of Washington, Huggingface","website":"","twitter":"","note":""},{"name":"Simon Mille","position":"Researcher","organization":"Pompeu Fabra University","website":"https://scholar.google.es/citations?user=F8kFik0AAAAJ\u0026hl=en","twitter":"","note":""},{"name":"Moin Nadeem","position":"Graduate Student","organization":"MIT","website":"","twitter":"","note":""},{"name":"Shashi Narayan","position":"Research Scientist","organization":"Google Research","website":"https://sites.google.com/corp/view/shashinarayan/home","twitter":"","note":""},{"name":"Vitaly Nikolaev","position":"Linguist","organization":"Google Research","website":"","twitter":"","note":""},{"name":"Rubungo Andre Niyongabo","position":"Graduate Student","organization":"University of Electronic Science and Technology of China","website":"https://www.linkedin.com/in/rubungo-andre-niyongabo-851370168","twitter":"@andre_niyongabo","note":""},{"name":"Salomey Osei","position":"Graduate Student","organization":"Kwame Nkrumah University of Science and Technology","website":"https://www.linkedin.com/in/salomey-osei-4b08a5b8","twitter":"@NanaYaaSally","note":"Looking for a PhD position"},{"name":"Ankur Parikh","position":"Research Scientist","organization":"Google Research","website":"","twitter":"@ank_parikh","note":""},{"name":"Niranjan Ramesh Rao","position":"Graduate Student","organization":"National Institute of Technology Karnataka India","website":"","twitter":"","note":""},{"name":"Vikas Raunak","position":"Research Scientist","organization":"Microsoft","website":"https://vyraun.github.io/","twitter":"@vyraun","note":""},{"name":"Juan Diego Rodriguez","position":"Engineering Scientist","organization":"Applied Research Laboratories, The University of Texas at Austin","website":"","twitter":"","note":""},{"name":"Sashank Santhanam","position":"Graduate Student","organization":"UNC Charlotte","website":"","twitter":"","note":""},{"name":"João Sedoc","position":"Assistant Professor","organization":"New York University Stern School of Business","website":"https://www.joaosedoc.com","twitter":"@joaosedoc","note":""},{"name":"Anastasia Shimorina","position":"Graduate Student","organization":"Université de Lorraine","website":"","twitter":"","note":""},{"name":"Marco Antonio Sobrevilla Cabezudo","position":"Graduate Student","organization":"University of São Paulo","website":"","twitter":"","note":""},{"name":"Hendrik Strobelt","position":"Researcher","organization":"IBM Research","website":"http://hendrik.strobelt.com","twitter":"@hen_str","note":""},{"name":"Nishant Subramani","position":"Predoctoral Resident","organization":"Intelligent Systems Lab, Intel","website":"","twitter":"","note":""},{"name":"Emiel van Miltenburg","position":"Assistant Professor","organization":"Tilburg University","website":"https://emielvanmiltenburg.nl","twitter":"@evanmiltenburg","note":""},{"name":"Diyi Yang","position":"Assistant Professor","organization":"Georgia Tech","website":"","twitter":"@diyi_yang","note":""},{"name":"Yacine Yernite","position":"Research Scientist","organization":"Huggingface","website":"https://yjernite.github.io/","twitter":"@YJernite","note":""},{"name":"Akhila Yerukola","position":"AI Researcher","organization":"Samsung Research","website":"http://akhila-yerukola.github.io/","twitter":"@akhila-yerukola","note":""},{"name":"Jiawei Zhou","position":"Graduate Student","organization":"Harvard University","website":"","twitter":"","note":""}]}},"__N_SSG":true},"page":"/team/2021","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Team 2021</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/6517b3935a1e344f.css" as="style"/><link rel="stylesheet" href="/_next/static/css/6517b3935a1e344f.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/2cca2479-7e9f1af5d51da309.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/team/2021-13f83fded5cb2810.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><div class="utils_headingXl__zlq1q">GEMv1 Team</div><div class="__2021_description__8g5Ob">GEM is a community-driven effort with the goal to improve how progress in natural language generation is measured. It would not be possible without a large group of collaborators to take on challenging tasks.<p>This page acts as a directory of our amazing contributors. If you want to join the organization,  <a href="/team/join">click here to fill out the sign-up form.</a></p></div><div class="__2021_centered__5nV8M"><section class="__2021_cards__9JF_K"><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Sebastian Gehrmann</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div><div><a href="https://sebastiangehrmann.github.io" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@SebGehr" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Antoine Bosselut</h3><p class="__2021_title__1S7ct">Stanford University</p><div class="__2021_note__4cN0s"></div><div><a href="https://atcbosselut.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ABosselut" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Laura Perez-Beltrachini</h3><p class="__2021_title__1S7ct">University of Edinburgh</p><div class="__2021_note__4cN0s"></div><div><a href="http://homepages.inf.ed.ac.uk/lperez/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Samira Shaikh</h3><p class="__2021_title__1S7ct">UNC Charlotte</p><div class="__2021_note__4cN0s">Looking for a post-doc</div><div><a href="https://webpages.uncc.edu/sshaikh2/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Wei Xu</h3><p class="__2021_title__1S7ct">Georgia Tech</p><div class="__2021_note__4cN0s"></div><div><a href="https://cocoxu.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@cocoweixu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Esin Durmus</h3><p class="__2021_title__1S7ct">Cornell University</p><div class="__2021_note__4cN0s"></div><div><a href="http://www.cs.cornell.edu/~esindurmus/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@esindurmusnlp" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Varun Prashant Gangal</h3><p class="__2021_title__1S7ct">Carnegie Mellon University</p><div class="__2021_note__4cN0s"></div><div><a href="https://vgtomahawk.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@varungangal" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Tosin Adewumi</h3><p class="__2021_title__1S7ct">Luleå University of Technology</p><div class="__2021_note__4cN0s"></div><div> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@tosintwit" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Karmanya Aggarwal</h3><p class="__2021_title__1S7ct">IIIT Delhi</p><div class="__2021_note__4cN0s"></div><div><a href="https://calmdownkarm.com/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@karmanya" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Pawan Sasanka Ammanamanchi</h3><p class="__2021_title__1S7ct">IIIT Hyderabad</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.pawansasanka.me/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@paws_ed" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Aremu Anuoluwapo</h3><p class="__2021_title__1S7ct">University of Lagos</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Khyathi Chandu</h3><p class="__2021_title__1S7ct">Carnegie Mellon University</p><div class="__2021_note__4cN0s"></div><div><a href="http://www.cs.cmu.edu/~kchandu/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@khyathi_chandu" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Miruna Clinciu</h3><p class="__2021_title__1S7ct">Edinburgh Centre for Robotics</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.mirunaclinciu.com/p/about-me.html" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Dipanjan Das</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.dipanjandas.com" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@dipanjand" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Kaustubh Dhole</h3><p class="__2021_title__1S7ct">Amelia R&amp;D, New York</p><div class="__2021_note__4cN0s">Looking for Collaborators</div><div><a href="https://in.linkedin.com/in/kaustubh-dhol%C3%A9-3929b32a" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@KaustubhDhole" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Wanyu Du</h3><p class="__2021_title__1S7ct">University of Virginia</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Esin Durmus</h3><p class="__2021_title__1S7ct">Cornell University</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Ondřej Dušek</h3><p class="__2021_title__1S7ct">Charles University, Prague</p><div class="__2021_note__4cN0s"></div><div><a href="https://tuetschek.github.io" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@tuetschek" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Chris Emezue</h3><p class="__2021_title__1S7ct">Technical University, Munich</p><div class="__2021_note__4cN0s">Looking for PhD positions</div><div><a href="https://github.com/chrisemezue" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ChrisEmezue" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Cristina Garbacea</h3><p class="__2021_title__1S7ct">University of Michigan, Ann Arbor</p><div class="__2021_note__4cN0s"></div><div><a href="https://cristinagarbacea.com/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ggarbacea" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Tatsunori Hashimoto</h3><p class="__2021_title__1S7ct">Stanford University</p><div class="__2021_note__4cN0s"></div><div> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@tatsu_hashimoto" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Yufang Hou</h3><p class="__2021_title__1S7ct">IBM Research</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Harsh Jhamtani</h3><p class="__2021_title__1S7ct">Carnegie Mellon University</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Yangfeng Ji</h3><p class="__2021_title__1S7ct">University of Virginia</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Shailza Jolly</h3><p class="__2021_title__1S7ct">Technical University of Kaiserslautern and DFKI, Germany</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Dhruv Kumar</h3><p class="__2021_title__1S7ct">University of Waterloo</p><div class="__2021_note__4cN0s">Looking for PhD positions</div><div><a href="https://ddhruvkr.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ddhruvkr" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Faisal Ladhak</h3><p class="__2021_title__1S7ct">Columbia University</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Aman Madaan</h3><p class="__2021_title__1S7ct">Carnegie Mellon University</p><div class="__2021_note__4cN0s"></div><div><a href="https://madaan.github.io" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@aman_madaan" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Mounica Maddela</h3><p class="__2021_title__1S7ct">Georgia Tech</p><div class="__2021_note__4cN0s"></div><div><a href="https://mounicam.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@mmaddela1005" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Khyati Mahajan</h3><p class="__2021_title__1S7ct">UNC Charlotte</p><div class="__2021_note__4cN0s"></div><div><a href="https://khyatimahajan.com/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Saad Mahamood</h3><p class="__2021_title__1S7ct">trivago</p><div class="__2021_note__4cN0s"></div><div><a href="https://saad.me.uk" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@Saad_M" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Bodhisattwa Prasad Majumder</h3><p class="__2021_title__1S7ct">University of California, San Diego</p><div class="__2021_note__4cN0s"></div><div><a href="http://www.majumderb.com/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@mbodhisattwa" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Pedro Henrique Martins</h3><p class="__2021_title__1S7ct">Instituto de Telecomunicações</p><div class="__2021_note__4cN0s"></div><div><a href="https://pedrohenriqueamartins.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@pedrohenmartins" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Angie McMillan-Major</h3><p class="__2021_title__1S7ct">University of Washington, Huggingface</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Simon Mille</h3><p class="__2021_title__1S7ct">Pompeu Fabra University</p><div class="__2021_note__4cN0s"></div><div><a href="https://scholar.google.es/citations?user=F8kFik0AAAAJ&amp;hl=en" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Moin Nadeem</h3><p class="__2021_title__1S7ct">MIT</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Shashi Narayan</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div><div><a href="https://sites.google.com/corp/view/shashinarayan/home" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> </div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Vitaly Nikolaev</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Rubungo Andre Niyongabo</h3><p class="__2021_title__1S7ct">University of Electronic Science and Technology of China</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.linkedin.com/in/rubungo-andre-niyongabo-851370168" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@andre_niyongabo" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Salomey Osei</h3><p class="__2021_title__1S7ct">Kwame Nkrumah University of Science and Technology</p><div class="__2021_note__4cN0s">Looking for a PhD position</div><div><a href="https://www.linkedin.com/in/salomey-osei-4b08a5b8" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@NanaYaaSally" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Ankur Parikh</h3><p class="__2021_title__1S7ct">Google Research</p><div class="__2021_note__4cN0s"></div><div> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@ank_parikh" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Niranjan Ramesh Rao</h3><p class="__2021_title__1S7ct">National Institute of Technology Karnataka India</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Vikas Raunak</h3><p class="__2021_title__1S7ct">Microsoft</p><div class="__2021_note__4cN0s"></div><div><a href="https://vyraun.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@vyraun" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Juan Diego Rodriguez</h3><p class="__2021_title__1S7ct">Applied Research Laboratories, The University of Texas at Austin</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Sashank Santhanam</h3><p class="__2021_title__1S7ct">UNC Charlotte</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">João Sedoc</h3><p class="__2021_title__1S7ct">New York University Stern School of Business</p><div class="__2021_note__4cN0s"></div><div><a href="https://www.joaosedoc.com" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@joaosedoc" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Anastasia Shimorina</h3><p class="__2021_title__1S7ct">Université de Lorraine</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Marco Antonio Sobrevilla Cabezudo</h3><p class="__2021_title__1S7ct">University of São Paulo</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Hendrik Strobelt</h3><p class="__2021_title__1S7ct">IBM Research</p><div class="__2021_note__4cN0s"></div><div><a href="http://hendrik.strobelt.com" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@hen_str" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Nishant Subramani</h3><p class="__2021_title__1S7ct">Intelligent Systems Lab, Intel</p><div class="__2021_note__4cN0s"></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Emiel van Miltenburg</h3><p class="__2021_title__1S7ct">Tilburg University</p><div class="__2021_note__4cN0s"></div><div><a href="https://emielvanmiltenburg.nl" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@evanmiltenburg" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Diyi Yang</h3><p class="__2021_title__1S7ct">Georgia Tech</p><div class="__2021_note__4cN0s"></div><div> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@diyi_yang" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Yacine Yernite</h3><p class="__2021_title__1S7ct">Huggingface</p><div class="__2021_note__4cN0s"></div><div><a href="https://yjernite.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@YJernite" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Akhila Yerukola</h3><p class="__2021_title__1S7ct">Samsung Research</p><div class="__2021_note__4cN0s"></div><div><a href="http://akhila-yerukola.github.io/" target="_blank"></a> <span class="__2021_spacer__vWOVs"></span> <a href="https://twitter.com/@akhila-yerukola" target="_blank"><svg aria-hidden="true" focusable="false" data-prefix="fab" data-icon="twitter" class="svg-inline--fa fa-twitter utils_icon__AiQ5I" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"></path></svg></a></div></div><div class="__2021_card__XfP_0"><h3 class="__2021_name__zTXFB">Jiawei Zhou</h3><p class="__2021_title__1S7ct">Harvard University</p><div class="__2021_note__4cN0s"></div></div></section></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"teamData":{"teamMembers":[{"name":"Sebastian Gehrmann","position":"Research Scientist","organization":"Google Research","website":"https://sebastiangehrmann.github.io","twitter":"@SebGehr","note":""},{"name":"Antoine Bosselut","position":"Postdoc","organization":"Stanford University","website":"https://atcbosselut.github.io/","twitter":"@ABosselut","note":""},{"name":"Laura Perez-Beltrachini","position":"Research Assistant","organization":"University of Edinburgh","website":"http://homepages.inf.ed.ac.uk/lperez/","twitter":"","note":""},{"name":"Samira Shaikh","position":"Assistant Professor","organization":"UNC Charlotte","website":"https://webpages.uncc.edu/sshaikh2/","twitter":"","note":"Looking for a post-doc"},{"name":"Wei Xu","position":"Assistant Professor","organization":"Georgia Tech","website":"https://cocoxu.github.io/","twitter":"@cocoweixu","note":""},{"name":"Esin Durmus","position":"Graduate Student","organization":"Cornell University","website":"http://www.cs.cornell.edu/~esindurmus/","twitter":"@esindurmusnlp","note":""},{"name":"Varun Prashant Gangal","position":"Graduate Student","organization":"Carnegie Mellon University","website":"https://vgtomahawk.github.io/","twitter":"@varungangal","note":""},{"name":"Tosin Adewumi","position":"","organization":"Luleå University of Technology","website":"","twitter":"@tosintwit","note":""},{"name":"Karmanya Aggarwal","position":"Graduate Student","organization":"IIIT Delhi","website":"https://calmdownkarm.com/","twitter":"@karmanya","note":""},{"name":"Pawan Sasanka Ammanamanchi","position":"Graduate Student","organization":"IIIT Hyderabad","website":"https://www.pawansasanka.me/","twitter":"@paws_ed","note":""},{"name":"Aremu Anuoluwapo","position":"Undergraduate","organization":"University of Lagos","website":"","twitter":"","note":""},{"name":"Khyathi Chandu","position":"Graduate Student","organization":"Carnegie Mellon University","website":"http://www.cs.cmu.edu/~kchandu/","twitter":"@khyathi_chandu","note":""},{"name":"Miruna Clinciu","position":"Graduate Student","organization":"Edinburgh Centre for Robotics","website":"https://www.mirunaclinciu.com/p/about-me.html","twitter":"","note":""},{"name":"Dipanjan Das","position":"Research Scientist","organization":"Google Research","website":"https://www.dipanjandas.com","twitter":"@dipanjand","note":""},{"name":"Kaustubh Dhole","position":"Research Lead","organization":"Amelia R\u0026D, New York","website":"https://in.linkedin.com/in/kaustubh-dhol%C3%A9-3929b32a","twitter":"@KaustubhDhole","note":"Looking for Collaborators"},{"name":"Wanyu Du","position":"Graduate Student","organization":"University of Virginia","website":"","twitter":"","note":""},{"name":"Esin Durmus","position":"Graduate Student","organization":"Cornell University","website":"","twitter":"","note":""},{"name":"Ondřej Dušek","position":"Assistant Professor","organization":"Charles University, Prague","website":"https://tuetschek.github.io","twitter":"@tuetschek","note":""},{"name":"Chris Emezue","position":"Graduate Student","organization":"Technical University, Munich","website":"https://github.com/chrisemezue","twitter":"@ChrisEmezue","note":"Looking for PhD positions"},{"name":"Cristina Garbacea","position":"Graduate Student","organization":"University of Michigan, Ann Arbor","website":"https://cristinagarbacea.com/","twitter":"@ggarbacea","note":""},{"name":"Tatsunori Hashimoto","position":"Assistant Professor","organization":"Stanford University","website":"","twitter":"@tatsu_hashimoto","note":""},{"name":"Yufang Hou","position":"Researcher","organization":"IBM Research","website":"","twitter":"","note":""},{"name":"Harsh Jhamtani","position":"Graduate Student","organization":"Carnegie Mellon University","website":"","twitter":"","note":""},{"name":"Yangfeng Ji","position":"Assistant Professor","organization":"University of Virginia","website":"","twitter":"","note":""},{"name":"Shailza Jolly","position":"Graduate Student","organization":"Technical University of Kaiserslautern and DFKI, Germany","website":"","twitter":"","note":""},{"name":"Dhruv Kumar","position":"","organization":"University of Waterloo","website":"https://ddhruvkr.github.io/","twitter":"@ddhruvkr","note":"Looking for PhD positions"},{"name":"Faisal Ladhak","position":"Graduate Student","organization":"Columbia University","website":"","twitter":"","note":""},{"name":"Aman Madaan","position":"Graduate Student","organization":"Carnegie Mellon University","website":"https://madaan.github.io","twitter":"@aman_madaan","note":""},{"name":"Mounica Maddela","position":"Graduate Student","organization":"Georgia Tech","website":"https://mounicam.github.io/","twitter":"@mmaddela1005","note":""},{"name":"Khyati Mahajan","position":"Graduate Student","organization":"UNC Charlotte","website":"https://khyatimahajan.com/","twitter":"","note":""},{"name":"Saad Mahamood","position":"Researcher","organization":"trivago","website":"https://saad.me.uk","twitter":"@Saad_M","note":null},{"name":"Bodhisattwa Prasad Majumder","position":"Graduate Student","organization":"University of California, San Diego","website":"http://www.majumderb.com/","twitter":"@mbodhisattwa","note":""},{"name":"Pedro Henrique Martins","position":"Graduate Student","organization":"Instituto de Telecomunicações","website":"https://pedrohenriqueamartins.github.io/","twitter":"@pedrohenmartins","note":""},{"name":"Angie McMillan-Major","position":"Graduate Student","organization":"University of Washington, Huggingface","website":"","twitter":"","note":""},{"name":"Simon Mille","position":"Researcher","organization":"Pompeu Fabra University","website":"https://scholar.google.es/citations?user=F8kFik0AAAAJ\u0026hl=en","twitter":"","note":""},{"name":"Moin Nadeem","position":"Graduate Student","organization":"MIT","website":"","twitter":"","note":""},{"name":"Shashi Narayan","position":"Research Scientist","organization":"Google Research","website":"https://sites.google.com/corp/view/shashinarayan/home","twitter":"","note":""},{"name":"Vitaly Nikolaev","position":"Linguist","organization":"Google Research","website":"","twitter":"","note":""},{"name":"Rubungo Andre Niyongabo","position":"Graduate Student","organization":"University of Electronic Science and Technology of China","website":"https://www.linkedin.com/in/rubungo-andre-niyongabo-851370168","twitter":"@andre_niyongabo","note":""},{"name":"Salomey Osei","position":"Graduate Student","organization":"Kwame Nkrumah University of Science and Technology","website":"https://www.linkedin.com/in/salomey-osei-4b08a5b8","twitter":"@NanaYaaSally","note":"Looking for a PhD position"},{"name":"Ankur Parikh","position":"Research Scientist","organization":"Google Research","website":"","twitter":"@ank_parikh","note":""},{"name":"Niranjan Ramesh Rao","position":"Graduate Student","organization":"National Institute of Technology Karnataka India","website":"","twitter":"","note":""},{"name":"Vikas Raunak","position":"Research Scientist","organization":"Microsoft","website":"https://vyraun.github.io/","twitter":"@vyraun","note":""},{"name":"Juan Diego Rodriguez","position":"Engineering Scientist","organization":"Applied Research Laboratories, The University of Texas at Austin","website":"","twitter":"","note":""},{"name":"Sashank Santhanam","position":"Graduate Student","organization":"UNC Charlotte","website":"","twitter":"","note":""},{"name":"João Sedoc","position":"Assistant Professor","organization":"New York University Stern School of Business","website":"https://www.joaosedoc.com","twitter":"@joaosedoc","note":""},{"name":"Anastasia Shimorina","position":"Graduate Student","organization":"Université de Lorraine","website":"","twitter":"","note":""},{"name":"Marco Antonio Sobrevilla Cabezudo","position":"Graduate Student","organization":"University of São Paulo","website":"","twitter":"","note":""},{"name":"Hendrik Strobelt","position":"Researcher","organization":"IBM Research","website":"http://hendrik.strobelt.com","twitter":"@hen_str","note":""},{"name":"Nishant Subramani","position":"Predoctoral Resident","organization":"Intelligent Systems Lab, Intel","website":"","twitter":"","note":""},{"name":"Emiel van Miltenburg","position":"Assistant Professor","organization":"Tilburg University","website":"https://emielvanmiltenburg.nl","twitter":"@evanmiltenburg","note":""},{"name":"Diyi Yang","position":"Assistant Professor","organization":"Georgia Tech","website":"","twitter":"@diyi_yang","note":""},{"name":"Yacine Yernite","position":"Research Scientist","organization":"Huggingface","website":"https://yjernite.github.io/","twitter":"@YJernite","note":""},{"name":"Akhila Yerukola","position":"AI Researcher","organization":"Samsung Research","website":"http://akhila-yerukola.github.io/","twitter":"@akhila-yerukola","note":""},{"name":"Jiawei Zhou","position":"Graduate Student","organization":"Harvard University","website":"","twitter":"","note":""}]}},"__N_SSG":true},"page":"/team/2021","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/team/join.html b/team/join.html
index 7cff805f..aef9a087 100644
--- a/team/join.html
+++ b/team/join.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>Help us build GEM 💎</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/bceb2d77c77db79f.css" as="style"/><link rel="stylesheet" href="/_next/static/css/bceb2d77c77db79f.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/team/join-c4bb4d37c8c737a1.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Sign up to participate in the GEM Organization</span><span class="utils_smallSpace__dcJPu"></span><div><p>Please use the form below to sign up to help with GEM. We are looking for both junior and senior researchers across many tasks. Even if you are only looking to listen and learn, please sign up.</p><p>The involvement can range from participating in our data hackathon, documenting and improving your own dataset, or helping to write documentation, to organizing the next workshop or shared task. If the form below does not load for you, you can find the form at<a href="https://forms.gle/K3834ezoVSGPxNQQ7" target="_blank"> this URL</a>.</p></div><div class="join_centered__jxbae"><iframe src="https://docs.google.com/forms/d/e/1FAIpQLScUcmFM1rvmL1qVAatbHajDhqnKbNYK3oi6JzJ0_4wNTkiwog/viewform?embedded=true" width="100%" height="1782" frameBorder="0" marginHeight="0" marginWidth="0">Loading…</iframe></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/team/join","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>Help us build GEM 💎</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/bceb2d77c77db79f.css" as="style"/><link rel="stylesheet" href="/_next/static/css/bceb2d77c77db79f.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/team/join-1f15410a6fdefaa2.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Sign up to participate in the GEM Organization</span><span class="utils_smallSpace__dcJPu"></span><div><p>Please use the form below to sign up to help with GEM. We are looking for both junior and senior researchers across many tasks. Even if you are only looking to listen and learn, please sign up.</p><p>The involvement can range from participating in our data hackathon, documenting and improving your own dataset, or helping to write documentation, to organizing the next workshop or shared task. If the form below does not load for you, you can find the form at<a href="https://forms.gle/K3834ezoVSGPxNQQ7" target="_blank"> this URL</a>.</p></div><div class="join_centered__jxbae"><iframe src="https://docs.google.com/forms/d/e/1FAIpQLScUcmFM1rvmL1qVAatbHajDhqnKbNYK3oi6JzJ0_4wNTkiwog/viewform?embedded=true" width="100%" height="1782" frameBorder="0" marginHeight="0" marginWidth="0">Loading…</iframe></div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/team/join","query":{},"buildId":"V1edrWahfIsCPthpIgASU","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/turker_faq.html b/turker_faq.html
index fd4424f6..5e67fc32 100644
--- a/turker_faq.html
+++ b/turker_faq.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM MTurk Annotation FAQ</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/turker_faq-48f013534070af29.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM MTurk Annotation FAQ</span><span class="utils_smallSpace__dcJPu"></span><div><p>If you are working on a mechanical turk HIT and have any questions, please have
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM MTurk Annotation FAQ</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/turker_faq-00cb2ea336fe51b5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM MTurk Annotation FAQ</span><span class="utils_smallSpace__dcJPu"></span><div><p>If you are working on a mechanical turk HIT and have any questions, please have
 a look at the list of frequently asked questions below before contacting us.</p>
 <h2 id="user-content-table-of-contents">Table of Contents</h2>
 <ul>
@@ -9,4 +9,4 @@ <h2 id="user-content-question-1">Question 1</h2>
 <p>Answer 1</p>
 <h2 id="user-content-question-2">Question 2</h2>
 <p>Answer 2</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"Data":{"contentHtml":"\u003cp\u003eIf you are working on a mechanical turk HIT and have any questions, please have\na look at the list of frequently asked questions below before contacting us.\u003c/p\u003e\n\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#question-1\"\u003eQuestion 1\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#question-2\"\u003eQuestion 2\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-question-1\"\u003eQuestion 1\u003c/h2\u003e\n\u003cp\u003eAnswer 1\u003c/p\u003e\n\u003ch2 id=\"user-content-question-2\"\u003eQuestion 2\u003c/h2\u003e\n\u003cp\u003eAnswer 2\u003c/p\u003e\n","title":"FAQ for MTurk raters"}},"__N_SSG":true},"page":"/turker_faq","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"Data":{"contentHtml":"\u003cp\u003eIf you are working on a mechanical turk HIT and have any questions, please have\na look at the list of frequently asked questions below before contacting us.\u003c/p\u003e\n\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#question-1\"\u003eQuestion 1\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#question-2\"\u003eQuestion 2\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-question-1\"\u003eQuestion 1\u003c/h2\u003e\n\u003cp\u003eAnswer 1\u003c/p\u003e\n\u003ch2 id=\"user-content-question-2\"\u003eQuestion 2\u003c/h2\u003e\n\u003cp\u003eAnswer 2\u003c/p\u003e\n","title":"FAQ for MTurk raters"}},"__N_SSG":true},"page":"/turker_faq","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/tutorials.html b/tutorials.html
index 653c07f2..77e771d3 100644
--- a/tutorials.html
+++ b/tutorials.html
@@ -1 +1 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Model Cards</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/0aee61fa7f903b6c.css" as="style"/><link rel="stylesheet" href="/_next/static/css/0aee61fa7f903b6c.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials-a69ad6be7eda1572.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Tutorials</span><p class="tutorials_description__TWolk">Here you can find all information to get started using GEM datasets, models, and resources, and how to add new datasets.</p><span class="utils_smallSpace__dcJPu"></span><h2 class="utils_headingLg__RYtYb">Text Walkthroughs</h2><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a href="/tutorials/new_nl_augmenter_transformation"><a class="tutorials_larger__S2v9T">Using and Adding Transformation to NL-Augmenter</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Transformation</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This tutorial shows how to use and add your transformation to NL-Augmenter.</div></li><li class="utils_listItem__6FEiz"><a href="/tutorials/modeling"><a class="tutorials_larger__S2v9T">From pretrained model to submission</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Modeling</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This tutorial shows the entire pipeline from loading the data, creating a model, to formatting the submission file from predictions.</div></li><li class="utils_listItem__6FEiz"><a href="/tutorials/writing_a_data_card"><a class="tutorials_larger__S2v9T">Writing a data card</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This guide describes how to fill each section in the data card (GEMv1 version).</div></li><li class="utils_listItem__6FEiz"><a href="/tutorials/new_data_loader"><a class="tutorials_larger__S2v9T">Adding a data loader</a></a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This tutorial shows how to add your dataset to GEM.</div></li></ul><h2 class="utils_headingLg__RYtYb">Video Guides</h2><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a href="https://www.youtube.com/watch?v=DpK478-ozPE" target="_blank" class="tutorials_larger__S2v9T">Creating a filter</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Transformation</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This walkthrough shows you how to create a filter from scratch using NL-Augmenter.</div></li></ul><h2 class="utils_headingLg__RYtYb">Interactive Notebooks</h2><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a href="https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_modeling_walkthrough.ipynb" target="_blank" class="tutorials_larger__S2v9T">From pretrained model to submission</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Modeling</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This is an interactive version of the introduction tutorial.</div></li><li class="utils_listItem__6FEiz"><a href="https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_Hackathon_2021_filters_tutorial.ipynb" target="_blank" class="tutorials_larger__S2v9T">Creating a filter</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Transformation</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This notebook shows you how to create a filter from scratch using NL-Augmenter. Please see the accompanying video for in-depth explanations.</div></li></ul></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"allData":[{"id":"new_nl_augmenter_transformation","title":"Using and Adding Transformation to NL-Augmenter","type":"Transformation","background":"This tutorial shows how to use and add your transformation to NL-Augmenter."},{"id":"modeling","title":"From pretrained model to submission","type":"Modeling","background":"This tutorial shows the entire pipeline from loading the data, creating a model, to formatting the submission file from predictions."},{"id":"writing_a_data_card","title":"Writing a data card","type":"Data","background":"This guide describes how to fill each section in the data card (GEMv1 version)."},{"id":"new_data_loader","title":"Adding a data loader","type":"Data","background":"This tutorial shows how to add your dataset to GEM."}]},"__N_SSG":true},"page":"/tutorials","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Model Cards</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/0aee61fa7f903b6c.css" as="style"/><link rel="stylesheet" href="/_next/static/css/0aee61fa7f903b6c.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials-29d01441a932687d.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Tutorials</span><p class="tutorials_description__TWolk">Here you can find all information to get started using GEM datasets, models, and resources, and how to add new datasets.</p><span class="utils_smallSpace__dcJPu"></span><h2 class="utils_headingLg__RYtYb">Text Walkthroughs</h2><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a class="tutorials_larger__S2v9T" href="/tutorials/new_nl_augmenter_transformation">Using and Adding Transformation to NL-Augmenter</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Transformation</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This tutorial shows how to use and add your transformation to NL-Augmenter.</div></li><li class="utils_listItem__6FEiz"><a class="tutorials_larger__S2v9T" href="/tutorials/modeling">From pretrained model to submission</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Modeling</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This tutorial shows the entire pipeline from loading the data, creating a model, to formatting the submission file from predictions.</div></li><li class="utils_listItem__6FEiz"><a class="tutorials_larger__S2v9T" href="/tutorials/writing_a_data_card">Writing a data card</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This guide describes how to fill each section in the data card (GEMv1 version).</div></li><li class="utils_listItem__6FEiz"><a class="tutorials_larger__S2v9T" href="/tutorials/new_data_loader">Adding a data loader</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Data</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This tutorial shows how to add your dataset to GEM.</div></li></ul><h2 class="utils_headingLg__RYtYb">Video Guides</h2><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a href="https://www.youtube.com/watch?v=DpK478-ozPE" target="_blank" class="tutorials_larger__S2v9T">Creating a filter</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Transformation</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This walkthrough shows you how to create a filter from scratch using NL-Augmenter.</div></li></ul><h2 class="utils_headingLg__RYtYb">Interactive Notebooks</h2><ul class="utils_list__zR_Au"><li class="utils_listItem__6FEiz"><a href="https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_modeling_walkthrough.ipynb" target="_blank" class="tutorials_larger__S2v9T">From pretrained model to submission</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Modeling</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This is an interactive version of the introduction tutorial.</div></li><li class="utils_listItem__6FEiz"><a href="https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_Hackathon_2021_filters_tutorial.ipynb" target="_blank" class="tutorials_larger__S2v9T">Creating a filter</a><span class="utils_smallSpace__dcJPu"></span><small class="utils_lightText__B_gv3">Transformation</small><span class="utils_smallSpace__dcJPu"></span><div class="tutorials_model__DyScr">This notebook shows you how to create a filter from scratch using NL-Augmenter. Please see the accompanying video for in-depth explanations.</div></li></ul></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"allData":[{"id":"new_nl_augmenter_transformation","title":"Using and Adding Transformation to NL-Augmenter","type":"Transformation","background":"This tutorial shows how to use and add your transformation to NL-Augmenter."},{"id":"modeling","title":"From pretrained model to submission","type":"Modeling","background":"This tutorial shows the entire pipeline from loading the data, creating a model, to formatting the submission file from predictions."},{"id":"writing_a_data_card","title":"Writing a data card","type":"Data","background":"This guide describes how to fill each section in the data card (GEMv1 version)."},{"id":"new_data_loader","title":"Adding a data loader","type":"Data","background":"This tutorial shows how to add your dataset to GEM."}]},"__N_SSG":true},"page":"/tutorials","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/tutorials/modeling.html b/tutorials/modeling.html
index fc62a0ba..64510983 100644
--- a/tutorials/modeling.html
+++ b/tutorials/modeling.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->From pretrained model to submission</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials/%5Bid%5D-78530e674236e7c8.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">From pretrained model to submission</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Modeling</span><div><p>This tutorial presents a full walk-through on how to get started with GEM, how to load and inspect data, how to finetune a baseline model, and how to generate predictions.
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->From pretrained model to submission</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials/%5Bid%5D-69574b54cf872f16.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">From pretrained model to submission</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Modeling</span><div><p>This tutorial presents a full walk-through on how to get started with GEM, how to load and inspect data, how to finetune a baseline model, and how to generate predictions.
 Throughout this tutorial, we will focus on the CommonGen task, but we will note
 what changes to make to use another of the <a href="/data_cards">GEM datasets</a>.</p>
 <p>You can also run this tutorial as a notebook <a href="https://colab.research.google.com/drive/1iREkGABObpdluTBNAnLhvyABEtdbLokT?usp=sharing">here</a>.</p>
@@ -331,4 +331,4 @@ <h2 id="user-content-evaluating-your-submission-file-with-the-gem-evaluation-fra
 <p>Assuming that you formatted and saved your outputs correctly, you can now run</p>
 <pre><code>python run_metrics.py [-r references.json] [-o outputs.scores.json] gem_submission.json</code></pre>
 <p>which will create a json file with your scores per task and challenge set. Please follow the <a href="https://github.com/GEM-benchmark/GEM-metrics">README</a> for more detailed usage information.</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003cp\u003eThis tutorial presents a full walk-through on how to get started with GEM, how to load and inspect data, how to finetune a baseline model, and how to generate predictions.\nThroughout this tutorial, we will focus on the CommonGen task, but we will note\nwhat changes to make to use another of the \u003ca href=\"/data_cards\"\u003eGEM datasets\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003eYou can also run this tutorial as a notebook \u003ca href=\"https://colab.research.google.com/drive/1iREkGABObpdluTBNAnLhvyABEtdbLokT?usp=sharing\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eSUBMITTING\u003c/strong\u003e Our \u003ca href=\"https://forms.gle/vbTZDMCuqzok8tTA9\"\u003esubmission form\u003c/a\u003e is permanently open! Please account for some extra time to write your model card.\u003c/p\u003e\n\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#preliminaries\"\u003ePreliminaries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#loading-the-data\"\u003eLoading the data\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#loading-a-single-example\"\u003eLoading a single example\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#finetuning-a-pretrained-model\"\u003eFinetuning a pretrained model\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#generating-and-evaluating-predictions\"\u003eGenerating and evaluating predictions\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#generating-and-submitting-test-predictions\"\u003eGenerating and submitting test predictions\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#format-description\"\u003eFormat description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#formatting-your-predictions\"\u003eFormatting Your Predictions\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluating-your-submission-file-with-the-gem-evaluation-framework\"\u003eEvaluating your submission file with the GEM evaluation framework\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-preliminaries\"\u003ePreliminaries\u003c/h2\u003e\n\u003cp\u003eThis tutorial uses PyTorch and the HuggingFace infrastructure to finetune models.\nYou need to install the following dependencies:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003epip install git+https://github.com/huggingface/datasets.git\npip install rouge_score\npip install sentencepiece\npip install transformers\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe recommend you use a GPU machine. You should be able to run all the code inside of a \u003ca href=\"https://colab.research.google.com/\"\u003ecolab notebook for free GPU access\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-loading-the-data\"\u003eLoading the data\u003c/h2\u003e\n\u003cp\u003eWe will be using \u003ca href=\"https://huggingface.co/datasets/gem\"\u003eHuggingFace datasets\u003c/a\u003e, but the GEM datasets are available in \u003ca href=\"https://www.tensorflow.org/datasets\"\u003eTFDS\u003c/a\u003e as well.\u003c/p\u003e\n\u003cp\u003eYou can load and inspect datasets like this:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003e\u003e \u003cspan\u003efrom\u003c/span\u003e datasets \u003cspan\u003eimport\u003c/span\u003e load_dataset\n\u003e\u003e DATASET_NAME = \u003cspan\u003e\"common_gen\"\u003c/span\u003e\n\u003e\u003e data = load_dataset(\u003cspan\u003e\"gem\"\u003c/span\u003e, DATASET_NAME)\n\u003e\u003e data\n\nDatasetDict({\n    train: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e67389\u003c/span\u003e\n    })\n    validation: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e993\u003c/span\u003e\n    })\n    test: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e1497\u003c/span\u003e\n    })\n    challenge_train_sample: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e500\u003c/span\u003e\n    })\n    challenge_validation_sample: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e500\u003c/span\u003e\n    })\n    challenge_test_scramble: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e500\u003c/span\u003e\n    })\n})\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eYou can notice that challenge sets created as part of GEM act just like any other data split, which means that you can use them with exactly the same code!\u003c/p\u003e\n\u003cp\u003eGEM supports many other datasets, simply pick one from this list and check out the corresponding \u003ca href=\"/data_cards\"\u003edata cards\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-loading-a-single-example\"\u003eLoading a single example\u003c/h3\u003e\n\u003cp\u003eNow let's look at a single example:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003e\u003e data[\u003cspan\u003e'train'\u003c/span\u003e][\u003cspan\u003e0\u003c/span\u003e]\n\n{\n    \u003cspan\u003e'concept_set_id'\u003c/span\u003e: \u003cspan\u003e0\u003c/span\u003e,\n    \u003cspan\u003e'concepts'\u003c/span\u003e: [\u003cspan\u003e'mountain'\u003c/span\u003e, \u003cspan\u003e'ski'\u003c/span\u003e, \u003cspan\u003e'skier'\u003c/span\u003e],\n    \u003cspan\u003e'gem_id'\u003c/span\u003e: \u003cspan\u003e'common_gen-train-0'\u003c/span\u003e,\n    \u003cspan\u003e'references'\u003c/span\u003e: [],\n    \u003cspan\u003e'target'\u003c/span\u003e: \u003cspan\u003e'Skier skis down the mountain'\u003c/span\u003e\n}\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eCommonGen is a task that asks for the production of a sentence (\u003ccode\u003etarget\u003c/code\u003e) from a set of concepts (\u003ccode\u003econcepts\u003c/code\u003e). Since one concept set can generate multiple meaningful sentences, the example also includes a unique identifier (\u003ccode\u003econcept_set_id\u003c/code\u003e) so that multiple references can be linked to an input.\u003c/p\u003e\n\u003cp\u003eNext, let's define utility functions that can generate batches of (tokenized) examples which we can use during training.\u003c/p\u003e\n\u003cp\u003eWe create a function that takes a batch from a dataset and constructs the corresponding input string. In our CommonGen example, we concatenate concepts into a single string for each instance.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003econstruct_input_for_batch\u003c/span\u003e(\u003cspan\u003ebatch\u003c/span\u003e):\n    \u003cspan\u003e\"\"\"Construct input strings from a batch.\"\"\"\u003c/span\u003e\n    source = [\u003cspan\u003e' '\u003c/span\u003e.join(concepts) \u003cspan\u003efor\u003c/span\u003e concepts \u003cspan\u003ein\u003c/span\u003e batch[\u003cspan\u003e\"concepts\"\u003c/span\u003e]]\n    target = batch[\u003cspan\u003e\"target\"\u003c/span\u003e]\n    \u003cspan\u003ereturn\u003c/span\u003e source, target\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe then create a function that tokenizes the batches. Depending on your task, you might want to consider adjusting the \u003ccode\u003emax_length\u003c/code\u003e.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003ebatch_tokenize\u003c/span\u003e(\u003cspan\u003ebatch, tokenizer, max_length=\u003cspan\u003e32\u003c/span\u003e\u003c/span\u003e):\n    \u003cspan\u003e\"\"\"Construct the batch (source, target) and run them through a tokenizer.\"\"\"\u003c/span\u003e\n    source, target = construct_input_for_batch(batch)\n    res = {\n        \u003cspan\u003e\"input_ids\"\u003c/span\u003e: tokenizer(source)[\u003cspan\u003e\"input_ids\"\u003c/span\u003e],\n        \u003cspan\u003e\"labels\"\u003c/span\u003e: tokenizer(\n            target,\n            padding=\u003cspan\u003e\"max_length\"\u003c/span\u003e,\n            truncation=\u003cspan\u003eTrue\u003c/span\u003e,\n            max_length=max_length\n        )[\u003cspan\u003e\"input_ids\"\u003c/span\u003e],\n    }\n    \u003cspan\u003ereturn\u003c/span\u003e res\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAll we need to do now to preprocess the dataset is to call \u003ccode\u003ebatch_tokenize\u003c/code\u003e on it. For our example, we are using BART-base as a model and we need to load the corresponding tokenizer.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e transformers \u003cspan\u003eimport\u003c/span\u003e AutoTokenizer\n\nMODEL_NAME = \u003cspan\u003e\"facebook/bart-base\"\u003c/span\u003e\nMAX_LENGTH = \u003cspan\u003e32\u003c/span\u003e\n\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n\ntrain_data_tokenized = data[\u003cspan\u003e'train'\u003c/span\u003e].\u003cspan\u003emap\u003c/span\u003e(\n    \u003cspan\u003elambda\u003c/span\u003e batch: batch_tokenize(batch, tokenizer, max_length=MAX_LENGTH),\n    batched=\u003cspan\u003eTrue\u003c/span\u003e\n)\nvalid_data_tokenized = data[\u003cspan\u003e'validation'\u003c/span\u003e].\u003cspan\u003emap\u003c/span\u003e(\n    \u003cspan\u003elambda\u003c/span\u003e batch: batch_tokenize(batch, tokenizer, max_length=MAX_LENGTH),\n    batched=\u003cspan\u003eTrue\u003c/span\u003e\n)\u003c/code\u003e\u003c/pre\u003e\n\u003ch2 id=\"user-content-finetuning-a-pretrained-model\"\u003eFinetuning a pretrained model\u003c/h2\u003e\n\u003cp\u003eWe can now utilize the preprocessed data to finetune a model. To do so, we will use the \u003ca href=\"https://huggingface.co/transformers/main_classes/trainer.html#seq2seqtrainingarguments\"\u003eTrainer API\u003c/a\u003e which handles gradient updates, model selection, and evaluation for us.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e transformers \u003cspan\u003eimport\u003c/span\u003e AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eTo improve model selection, let's pick the model that has the best test performance on ROUGE-2, a metric that is typically associated with higher fluency. We can do this by constructing a function that returns a function that computes the score and we only have to pass it to our trainer.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e datasets \u003cspan\u003eimport\u003c/span\u003e load_metric\n\nrouge_scorer = load_metric(\u003cspan\u003e\"rouge\"\u003c/span\u003e)\n\n\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003erouge_metric_builder\u003c/span\u003e(\u003cspan\u003etokenizer\u003c/span\u003e):\n    \u003cspan\u003edef\u003c/span\u003e \u003cspan\u003ecompute_rouge_metrics\u003c/span\u003e(\u003cspan\u003epred\u003c/span\u003e):\n        \u003cspan\u003e\"\"\"Utility to compute ROUGE during training.\"\"\"\u003c/span\u003e\n        labels_ids = pred.label_ids\n        pred_ids = pred.predictions\n        \u003cspan\u003e# All special tokens are removed.\u003c/span\u003e\n        pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=\u003cspan\u003eTrue\u003c/span\u003e)\n        labels_ids[labels_ids == -\u003cspan\u003e100\u003c/span\u003e] = tokenizer.pad_token_id\n        label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=\u003cspan\u003eTrue\u003c/span\u003e)\n        \u003cspan\u003e# Compute the metric.\u003c/span\u003e\n        rouge_results = rouge_scorer.compute(\n            predictions=pred_str,\n            references=label_str,\n            rouge_types=[\u003cspan\u003e\"rouge2\"\u003c/span\u003e, \u003cspan\u003e\"rougeL\"\u003c/span\u003e],\n            use_agregator=\u003cspan\u003eTrue\u003c/span\u003e,\n            use_stemmer=\u003cspan\u003eFalse\u003c/span\u003e,\n        )\n        \u003cspan\u003ereturn\u003c/span\u003e {\n            \u003cspan\u003e\"rouge2\"\u003c/span\u003e: \u003cspan\u003eround\u003c/span\u003e(rouge_results[\u003cspan\u003e'rouge2'\u003c/span\u003e].mid.fmeasure, \u003cspan\u003e4\u003c/span\u003e),\n            \u003cspan\u003e\"rougeL\"\u003c/span\u003e: \u003cspan\u003eround\u003c/span\u003e(rouge_results[\u003cspan\u003e'rougeL'\u003c/span\u003e].mid.fmeasure, \u003cspan\u003e4\u003c/span\u003e),\n        }\n    \u003cspan\u003ereturn\u003c/span\u003e compute_rouge_metrics\n\nrouge_metric_fn = rouge_metric_builder(tokenizer)\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe load our model and set some parameters for training and generating.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003eimport\u003c/span\u003e torch\n\nDEVICE = \u003cspan\u003e\"cuda:0\"\u003c/span\u003e \u003cspan\u003eif\u003c/span\u003e torch.cuda.is_available() \u003cspan\u003eelse\u003c/span\u003e \u003cspan\u003e\"cpu\"\u003c/span\u003e\nRANDOM_SEED = \u003cspan\u003e42\u003c/span\u003e\nBEAM_SIZE = \u003cspan\u003e4\u003c/span\u003e\n\nmodel = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)\nmodel = model.to(DEVICE)\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eFantastic, now all we have to do is set up our trainer class with everything we defined so far and run it!\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003etrain_args = Seq2SeqTrainingArguments(\n    output_dir=\u003cspan\u003e\"BART-commongen\"\u003c/span\u003e,\n    evaluation_strategy=\u003cspan\u003e\"epoch\"\u003c/span\u003e, \n    save_strategy=\u003cspan\u003e\"epoch\"\u003c/span\u003e,\n    logging_steps=\u003cspan\u003e100\u003c/span\u003e,\n    \u003cspan\u003e# optimization args, the trainer uses the Adam optimizer\u003c/span\u003e\n    \u003cspan\u003e# and has a linear warmup for the learning rate\u003c/span\u003e\n    per_device_train_batch_size=\u003cspan\u003e32\u003c/span\u003e,\n    per_device_eval_batch_size=\u003cspan\u003e32\u003c/span\u003e,\n    gradient_accumulation_steps=\u003cspan\u003e1\u003c/span\u003e,\n    learning_rate=\u003cspan\u003e1e-04\u003c/span\u003e,\n    num_train_epochs=\u003cspan\u003e3\u003c/span\u003e,\n    warmup_steps=\u003cspan\u003e1000\u003c/span\u003e,\n    \u003cspan\u003e# misc args\u003c/span\u003e\n    seed=RANDOM_SEED,\n    disable_tqdm=\u003cspan\u003eFalse\u003c/span\u003e,\n    load_best_model_at_end=\u003cspan\u003eTrue\u003c/span\u003e,\n    metric_for_best_model=\u003cspan\u003e\"rouge2\"\u003c/span\u003e,\n    \u003cspan\u003e# generation\u003c/span\u003e\n    predict_with_generate=\u003cspan\u003eTrue\u003c/span\u003e,\n)\n\ntrainer = Seq2SeqTrainer(\n    model=model,\n    args=train_args,\n    train_dataset=train_data_tokenized,\n    eval_dataset=valid_data_tokenized,\n    tokenizer=tokenizer,\n    compute_metrics=rouge_metric_fn,\n)\n\ntrainer._max_length = MAX_LENGTH\ntrainer._num_beams = BEAM_SIZE\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAnd finally:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003e\u003e trainer.train()\n\nEpoch\tTraining Loss\tValidation Loss\tRouge2\t    Rougel\n\u003cspan\u003e1\u003c/span\u003e\t\u003cspan\u003e0.953500\u003c/span\u003e\t\u003cspan\u003e1.113132\u003c/span\u003e\t\u003cspan\u003e0.122500\u003c/span\u003e\t\u003cspan\u003e0.322200\u003c/span\u003e\n\u003cspan\u003e2\u003c/span\u003e\t\u003cspan\u003e0.825300\u003c/span\u003e\t\u003cspan\u003e1.132310\u003c/span\u003e\t\u003cspan\u003e0.133800\u003c/span\u003e\t\u003cspan\u003e0.324600\u003c/span\u003e\n\u003cspan\u003e3\u003c/span\u003e\t\u003cspan\u003e0.709400\u003c/span\u003e\t\u003cspan\u003e1.133418\u003c/span\u003e\t\u003cspan\u003e0.129300\u003c/span\u003e\t\u003cspan\u003e0.324700\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe now have a model that achieves 12.9 ROUGE-2 which can obviously still be tuned, but it is a great starting point.\u003c/p\u003e\n\u003ch2 id=\"user-content-generating-and-evaluating-predictions\"\u003eGenerating and evaluating predictions\u003c/h2\u003e\n\u003cp\u003eGiven that we now have our fine-tuned model, we can use it to generate outputs for evaluation. For this, let's build another utility function that handles tokenizing, generating with beam search decoding, and de-tokenizing.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003ebeam_generate_sentences\u003c/span\u003e(\u003cspan\u003e\n    batch,\n    model,\n    tokenizer,\n    num_beams=\u003cspan\u003e4\u003c/span\u003e,\n    max_length=\u003cspan\u003e32\u003c/span\u003e,\n    device=\u003cspan\u003e'cuda:0'\u003c/span\u003e\n\u003c/span\u003e):\n    \u003cspan\u003e\"\"\"Generate outputs from a model with beam search decoding.\"\"\"\u003c/span\u003e\n    \u003cspan\u003e# Create batch inputs.\u003c/span\u003e\n    source, _ = construct_input_for_batch(batch)\n    \u003cspan\u003e# Use the model's tokenizer to create the batch input_ids.\u003c/span\u003e\n    batch_features = tokenizer(source, padding=\u003cspan\u003eTrue\u003c/span\u003e, return_tensors=\u003cspan\u003e'pt'\u003c/span\u003e)\n    \u003cspan\u003e# Move all inputs to the device.\u003c/span\u003e\n    batch_features = \u003cspan\u003edict\u003c/span\u003e([(k, v.to(device)) \u003cspan\u003efor\u003c/span\u003e k, v \u003cspan\u003ein\u003c/span\u003e batch_features.items()])\n\n    \u003cspan\u003e# Generate with beam search.\u003c/span\u003e\n    generated_ids = model.generate(\n        **batch_features,\n        num_beams=num_beams,\n        max_length=max_length,\n    )\n\n    \u003cspan\u003e# Use model tokenizer to decode to text.\u003c/span\u003e\n    generated_sentences = [\n        tokenizer.decode(gen_ids.tolist(), skip_special_tokens=\u003cspan\u003eTrue\u003c/span\u003e)\n        \u003cspan\u003efor\u003c/span\u003e gen_ids \u003cspan\u003ein\u003c/span\u003e generated_ids\n    ]\n    \u003cspan\u003ereturn\u003c/span\u003e generated_sentences\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe can quickly apply this function across our validation set as a sanity check.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003evalid_output = data[\u003cspan\u003e'validation'\u003c/span\u003e].\u003cspan\u003emap\u003c/span\u003e(\n    \u003cspan\u003elambda\u003c/span\u003e batch: {\u003cspan\u003e'generated'\u003c/span\u003e: beam_generate_sentences(\n        batch,\n        model,\n        tokenizer,\n        num_beams=BEAM_SIZE,\n        max_length=MAX_LENGTH,\n        device=DEVICE)\n    },\n    batched=\u003cspan\u003eTrue\u003c/span\u003e,\n    batch_size=\u003cspan\u003e128\u003c/span\u003e,\n)\n\n\u003cspan\u003e# Evaluate for ROUGE-2/L\u003c/span\u003e\nrouge_results = rouge_scorer.compute(\n    predictions=valid_output[\u003cspan\u003e\"generated\"\u003c/span\u003e],\n    references=valid_output[\u003cspan\u003e\"target\"\u003c/span\u003e],\n    rouge_types=[\u003cspan\u003e\"rouge2\"\u003c/span\u003e, \u003cspan\u003e\"rougeL\"\u003c/span\u003e],\n    use_agregator=\u003cspan\u003eTrue\u003c/span\u003e, use_stemmer=\u003cspan\u003eFalse\u003c/span\u003e,\n)\n\n\u003cspan\u003ef\"R-2: \u003cspan\u003e{rouge_results[\u003cspan\u003e'rouge2'\u003c/span\u003e].mid.fmeasure:\u003cspan\u003e.3\u003c/span\u003ef}\u003c/span\u003e R-L: \u003cspan\u003e{rouge_results[\u003cspan\u003e'rougeL'\u003c/span\u003e].mid.fmeasure:\u003cspan\u003e.3\u003c/span\u003ef}\u003c/span\u003e\"\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAs expected, this yields the following output:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e'R-2: 0.134 R-L: 0.325'\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003ch2 id=\"user-content-generating-and-submitting-test-predictions\"\u003eGenerating and submitting test predictions\u003c/h2\u003e\n\u003cp\u003eYou can submit your model along with test predictions via our \u003ca href=\"https://forms.gle/vbTZDMCuqzok8tTA9\"\u003esubmission form\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-format-description\"\u003eFormat description\u003c/h3\u003e\n\u003cp\u003ePlease follow this format for your submission file:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e{\u003c/span\u003e\n  \u003cspan\u003e\"submission_name\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"An identifying name of your system\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"param_count\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e123\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e # the number of parameters your system has.\n  \u003cspan\u003e\"description\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"An optional brief description of the system that will be shown on the website\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"tasks\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e\n    \u003cspan\u003e{\u003c/span\u003e\n      \u003cspan\u003e\"dataset_identifier\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e{\u003c/span\u003e\n        \u003cspan\u003e\"values\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e\"output1\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"output2\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"...\"\u003c/span\u003e\u003cspan\u003e]\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e # A list of system outputs\n        # Optionally\u003cspan\u003e,\u003c/span\u003e you can add the keys which are part of an example to ensure that there is no shuffling mistakes.\n        \u003cspan\u003e\"keys\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e\"schema_guided_dialog-test-9585\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"schema_guided_dialog-test-9585\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e ...\u003cspan\u003e]\u003c/span\u003e\n        \u003cspan\u003e}\u003c/span\u003e\n    \u003cspan\u003e}\u003c/span\u003e\n\u003cspan\u003e}\u003c/span\u003e\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIn this case, \u003ccode\u003edataset_identifier\u003c/code\u003e is the identifier of the dataset followed by an identifier of the set the outputs were created from, for example \u003ccode\u003e_validation\u003c/code\u003e or \u003ccode\u003e_test\u003c/code\u003e. That means, the common_gen validation set would have the identifier \u003ccode\u003ecommon_gen_validation\u003c/code\u003e.\u003c/p\u003e\n\u003cp\u003eThe \u003ccode\u003ekeys\u003c/code\u003e field can be set to avoid accidental shuffling to impact your metrics. Simply add a list of the \u003ccode\u003egem_id\u003c/code\u003e for each output example in the same order as your values.\u003c/p\u003e\n\u003ch3 id=\"user-content-formatting-your-predictions\"\u003eFormatting Your Predictions\u003c/h3\u003e\n\u003cp\u003eFor our tutorial, let's say we want to include results for the validation set and challenge set (\u003ccode\u003ecommon_gen_challenge_train_sample\u003c/code\u003e) outputs.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003echallenge_train_sample_output = data[\u003cspan\u003e\"challenge_train_sample\"\u003c/span\u003e].\u003cspan\u003emap\u003c/span\u003e(\n    \u003cspan\u003elambda\u003c/span\u003e batch: {\n        \u003cspan\u003e'generated'\u003c/span\u003e: beam_generate_sentences(\n            batch,\n            model,\n            tokenizer,\n            num_beams=BEAM_SIZE,\n            max_length=MAX_LENGTH,\n            device=DEVICE)\n    },\n    batched=\u003cspan\u003eTrue\u003c/span\u003e,\n    batch_size=\u003cspan\u003e128\u003c/span\u003e,\n)\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe add a \u003ccode\u003egenerated\u003c/code\u003e field into the dataset which makes analysis much easier. However, in our submission file we only want the actual values and corresponding IDs. Thus, we filter:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003evalid_formatted = [o[\u003cspan\u003e'generated'\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e o \u003cspan\u003ein\u003c/span\u003e valid_output]\nvalid_keys = [o[\u003cspan\u003e'gem_id'\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e o \u003cspan\u003ein\u003c/span\u003e data[\u003cspan\u003e'validation'\u003c/span\u003e]]\n\nchallenge_train_sample_formatted = [o[\u003cspan\u003e'generated'\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e o \u003cspan\u003ein\u003c/span\u003e challenge_train_sample_output]\nchallenge_train_sample_keys = [o[\u003cspan\u003e'gem_id'\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e o \u003cspan\u003ein\u003c/span\u003e data[\u003cspan\u003e'challenge_train_sample'\u003c/span\u003e]]\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIn our final step, we only have to add the outputs to our larger submission construct.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003eSUBMISSION_NAME = \u003cspan\u003e\"An identifying name of your system\"\u003c/span\u003e\nDESCRIPTION = \u003cspan\u003e\"An optional brief description of the system that will be shown on the website\"\u003c/span\u003e\n\nsubmission_dict = {\n    \u003cspan\u003e\"submission_name\"\u003c/span\u003e: SUBMISSION_NAME ,\n    \u003cspan\u003e\"param_count\"\u003c/span\u003e: \u003cspan\u003esum\u003c/span\u003e(p.numel() \u003cspan\u003efor\u003c/span\u003e p \u003cspan\u003ein\u003c/span\u003e model.parameters()),\n    \u003cspan\u003e\"description\"\u003c/span\u003e: DESCRIPTION,\n    \u003cspan\u003e\"tasks\"\u003c/span\u003e: {\n      \u003cspan\u003e\"common_gen_validation\"\u003c/span\u003e: {\n          \u003cspan\u003e\"values\"\u003c/span\u003e: valid_formatted, \n          \u003cspan\u003e\"keys\"\u003c/span\u003e: valid_keys\n          }\n    }\n}\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eThis format is scalable to more tasks: you simply need to add more outputs to the \u003ccode\u003etasks\u003c/code\u003e subfield.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e# Submit results for challenge set.\u003c/span\u003e\nnew_task_name = \u003cspan\u003e\"common_gen_challenge_train_sample\"\u003c/span\u003e\nnew_task_data = {\n    \u003cspan\u003e\"values\"\u003c/span\u003e: challenge_train_sample_formatted, \n    \u003cspan\u003e\"keys\"\u003c/span\u003e: challenge_train_sample_keys\n} \nsubmission_dict[\u003cspan\u003e\"tasks\"\u003c/span\u003e][new_task_name] = new_task_data\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eThe last step is to write our submission dictionary to a file.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003eimport\u003c/span\u003e json\n\u003cspan\u003ewith\u003c/span\u003e \u003cspan\u003eopen\u003c/span\u003e(\u003cspan\u003e'gem_submission.json'\u003c/span\u003e, \u003cspan\u003e'w'\u003c/span\u003e) \u003cspan\u003eas\u003c/span\u003e f:\n    f.write(json.dumps(submission_dict))\u003c/code\u003e\u003c/pre\u003e\n\u003ch2 id=\"user-content-evaluating-your-submission-file-with-the-gem-evaluation-framework\"\u003eEvaluating your submission file with the GEM evaluation framework\u003c/h2\u003e\n\u003cp\u003eObviously, we do not want to rely only on ROUGE scores. For this, we developed the GEM evaluation framework.\u003c/p\u003e\n\u003cp\u003eYou can download it by running:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003egit \u003cspan\u003eclone\u003c/span\u003e git@github.com:GEM-benchmark/GEM-metrics.git\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eInstall the required packages:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003ecd\u003c/span\u003e GEM-metrics\npip install -r requirements.txt\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAssuming that you formatted and saved your outputs correctly, you can now run\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003epython run_metrics.py [-r references.json] [-o outputs.scores.json] gem_submission.json\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003ewhich will create a json file with your scores per task and challenge set. Please follow the \u003ca href=\"https://github.com/GEM-benchmark/GEM-metrics\"\u003eREADME\u003c/a\u003e for more detailed usage information.\u003c/p\u003e\n","title":"From pretrained model to submission","type":"Modeling","background":"This tutorial shows the entire pipeline from loading the data, creating a model, to formatting the submission file from predictions."}},"__N_SSG":true},"page":"/tutorials/[id]","query":{"id":"modeling"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003cp\u003eThis tutorial presents a full walk-through on how to get started with GEM, how to load and inspect data, how to finetune a baseline model, and how to generate predictions.\nThroughout this tutorial, we will focus on the CommonGen task, but we will note\nwhat changes to make to use another of the \u003ca href=\"/data_cards\"\u003eGEM datasets\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003eYou can also run this tutorial as a notebook \u003ca href=\"https://colab.research.google.com/drive/1iREkGABObpdluTBNAnLhvyABEtdbLokT?usp=sharing\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eSUBMITTING\u003c/strong\u003e Our \u003ca href=\"https://forms.gle/vbTZDMCuqzok8tTA9\"\u003esubmission form\u003c/a\u003e is permanently open! Please account for some extra time to write your model card.\u003c/p\u003e\n\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#preliminaries\"\u003ePreliminaries\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#loading-the-data\"\u003eLoading the data\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#loading-a-single-example\"\u003eLoading a single example\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#finetuning-a-pretrained-model\"\u003eFinetuning a pretrained model\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#generating-and-evaluating-predictions\"\u003eGenerating and evaluating predictions\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#generating-and-submitting-test-predictions\"\u003eGenerating and submitting test predictions\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#format-description\"\u003eFormat description\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#formatting-your-predictions\"\u003eFormatting Your Predictions\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#evaluating-your-submission-file-with-the-gem-evaluation-framework\"\u003eEvaluating your submission file with the GEM evaluation framework\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-preliminaries\"\u003ePreliminaries\u003c/h2\u003e\n\u003cp\u003eThis tutorial uses PyTorch and the HuggingFace infrastructure to finetune models.\nYou need to install the following dependencies:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003epip install git+https://github.com/huggingface/datasets.git\npip install rouge_score\npip install sentencepiece\npip install transformers\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe recommend you use a GPU machine. You should be able to run all the code inside of a \u003ca href=\"https://colab.research.google.com/\"\u003ecolab notebook for free GPU access\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-loading-the-data\"\u003eLoading the data\u003c/h2\u003e\n\u003cp\u003eWe will be using \u003ca href=\"https://huggingface.co/datasets/gem\"\u003eHuggingFace datasets\u003c/a\u003e, but the GEM datasets are available in \u003ca href=\"https://www.tensorflow.org/datasets\"\u003eTFDS\u003c/a\u003e as well.\u003c/p\u003e\n\u003cp\u003eYou can load and inspect datasets like this:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003e\u003e \u003cspan\u003efrom\u003c/span\u003e datasets \u003cspan\u003eimport\u003c/span\u003e load_dataset\n\u003e\u003e DATASET_NAME = \u003cspan\u003e\"common_gen\"\u003c/span\u003e\n\u003e\u003e data = load_dataset(\u003cspan\u003e\"gem\"\u003c/span\u003e, DATASET_NAME)\n\u003e\u003e data\n\nDatasetDict({\n    train: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e67389\u003c/span\u003e\n    })\n    validation: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e993\u003c/span\u003e\n    })\n    test: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e1497\u003c/span\u003e\n    })\n    challenge_train_sample: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e500\u003c/span\u003e\n    })\n    challenge_validation_sample: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e500\u003c/span\u003e\n    })\n    challenge_test_scramble: Dataset({\n        features: [\u003cspan\u003e'gem_id'\u003c/span\u003e, \u003cspan\u003e'gem_parent_id'\u003c/span\u003e, \u003cspan\u003e'concept_set_id'\u003c/span\u003e, \u003cspan\u003e'concepts'\u003c/span\u003e, \u003cspan\u003e'target'\u003c/span\u003e, \u003cspan\u003e'references'\u003c/span\u003e],\n        num_rows: \u003cspan\u003e500\u003c/span\u003e\n    })\n})\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eYou can notice that challenge sets created as part of GEM act just like any other data split, which means that you can use them with exactly the same code!\u003c/p\u003e\n\u003cp\u003eGEM supports many other datasets, simply pick one from this list and check out the corresponding \u003ca href=\"/data_cards\"\u003edata cards\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-loading-a-single-example\"\u003eLoading a single example\u003c/h3\u003e\n\u003cp\u003eNow let's look at a single example:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003e\u003e data[\u003cspan\u003e'train'\u003c/span\u003e][\u003cspan\u003e0\u003c/span\u003e]\n\n{\n    \u003cspan\u003e'concept_set_id'\u003c/span\u003e: \u003cspan\u003e0\u003c/span\u003e,\n    \u003cspan\u003e'concepts'\u003c/span\u003e: [\u003cspan\u003e'mountain'\u003c/span\u003e, \u003cspan\u003e'ski'\u003c/span\u003e, \u003cspan\u003e'skier'\u003c/span\u003e],\n    \u003cspan\u003e'gem_id'\u003c/span\u003e: \u003cspan\u003e'common_gen-train-0'\u003c/span\u003e,\n    \u003cspan\u003e'references'\u003c/span\u003e: [],\n    \u003cspan\u003e'target'\u003c/span\u003e: \u003cspan\u003e'Skier skis down the mountain'\u003c/span\u003e\n}\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eCommonGen is a task that asks for the production of a sentence (\u003ccode\u003etarget\u003c/code\u003e) from a set of concepts (\u003ccode\u003econcepts\u003c/code\u003e). Since one concept set can generate multiple meaningful sentences, the example also includes a unique identifier (\u003ccode\u003econcept_set_id\u003c/code\u003e) so that multiple references can be linked to an input.\u003c/p\u003e\n\u003cp\u003eNext, let's define utility functions that can generate batches of (tokenized) examples which we can use during training.\u003c/p\u003e\n\u003cp\u003eWe create a function that takes a batch from a dataset and constructs the corresponding input string. In our CommonGen example, we concatenate concepts into a single string for each instance.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003econstruct_input_for_batch\u003c/span\u003e(\u003cspan\u003ebatch\u003c/span\u003e):\n    \u003cspan\u003e\"\"\"Construct input strings from a batch.\"\"\"\u003c/span\u003e\n    source = [\u003cspan\u003e' '\u003c/span\u003e.join(concepts) \u003cspan\u003efor\u003c/span\u003e concepts \u003cspan\u003ein\u003c/span\u003e batch[\u003cspan\u003e\"concepts\"\u003c/span\u003e]]\n    target = batch[\u003cspan\u003e\"target\"\u003c/span\u003e]\n    \u003cspan\u003ereturn\u003c/span\u003e source, target\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe then create a function that tokenizes the batches. Depending on your task, you might want to consider adjusting the \u003ccode\u003emax_length\u003c/code\u003e.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003ebatch_tokenize\u003c/span\u003e(\u003cspan\u003ebatch, tokenizer, max_length=\u003cspan\u003e32\u003c/span\u003e\u003c/span\u003e):\n    \u003cspan\u003e\"\"\"Construct the batch (source, target) and run them through a tokenizer.\"\"\"\u003c/span\u003e\n    source, target = construct_input_for_batch(batch)\n    res = {\n        \u003cspan\u003e\"input_ids\"\u003c/span\u003e: tokenizer(source)[\u003cspan\u003e\"input_ids\"\u003c/span\u003e],\n        \u003cspan\u003e\"labels\"\u003c/span\u003e: tokenizer(\n            target,\n            padding=\u003cspan\u003e\"max_length\"\u003c/span\u003e,\n            truncation=\u003cspan\u003eTrue\u003c/span\u003e,\n            max_length=max_length\n        )[\u003cspan\u003e\"input_ids\"\u003c/span\u003e],\n    }\n    \u003cspan\u003ereturn\u003c/span\u003e res\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAll we need to do now to preprocess the dataset is to call \u003ccode\u003ebatch_tokenize\u003c/code\u003e on it. For our example, we are using BART-base as a model and we need to load the corresponding tokenizer.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e transformers \u003cspan\u003eimport\u003c/span\u003e AutoTokenizer\n\nMODEL_NAME = \u003cspan\u003e\"facebook/bart-base\"\u003c/span\u003e\nMAX_LENGTH = \u003cspan\u003e32\u003c/span\u003e\n\ntokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n\ntrain_data_tokenized = data[\u003cspan\u003e'train'\u003c/span\u003e].\u003cspan\u003emap\u003c/span\u003e(\n    \u003cspan\u003elambda\u003c/span\u003e batch: batch_tokenize(batch, tokenizer, max_length=MAX_LENGTH),\n    batched=\u003cspan\u003eTrue\u003c/span\u003e\n)\nvalid_data_tokenized = data[\u003cspan\u003e'validation'\u003c/span\u003e].\u003cspan\u003emap\u003c/span\u003e(\n    \u003cspan\u003elambda\u003c/span\u003e batch: batch_tokenize(batch, tokenizer, max_length=MAX_LENGTH),\n    batched=\u003cspan\u003eTrue\u003c/span\u003e\n)\u003c/code\u003e\u003c/pre\u003e\n\u003ch2 id=\"user-content-finetuning-a-pretrained-model\"\u003eFinetuning a pretrained model\u003c/h2\u003e\n\u003cp\u003eWe can now utilize the preprocessed data to finetune a model. To do so, we will use the \u003ca href=\"https://huggingface.co/transformers/main_classes/trainer.html#seq2seqtrainingarguments\"\u003eTrainer API\u003c/a\u003e which handles gradient updates, model selection, and evaluation for us.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e transformers \u003cspan\u003eimport\u003c/span\u003e AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eTo improve model selection, let's pick the model that has the best test performance on ROUGE-2, a metric that is typically associated with higher fluency. We can do this by constructing a function that returns a function that computes the score and we only have to pass it to our trainer.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e datasets \u003cspan\u003eimport\u003c/span\u003e load_metric\n\nrouge_scorer = load_metric(\u003cspan\u003e\"rouge\"\u003c/span\u003e)\n\n\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003erouge_metric_builder\u003c/span\u003e(\u003cspan\u003etokenizer\u003c/span\u003e):\n    \u003cspan\u003edef\u003c/span\u003e \u003cspan\u003ecompute_rouge_metrics\u003c/span\u003e(\u003cspan\u003epred\u003c/span\u003e):\n        \u003cspan\u003e\"\"\"Utility to compute ROUGE during training.\"\"\"\u003c/span\u003e\n        labels_ids = pred.label_ids\n        pred_ids = pred.predictions\n        \u003cspan\u003e# All special tokens are removed.\u003c/span\u003e\n        pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=\u003cspan\u003eTrue\u003c/span\u003e)\n        labels_ids[labels_ids == -\u003cspan\u003e100\u003c/span\u003e] = tokenizer.pad_token_id\n        label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=\u003cspan\u003eTrue\u003c/span\u003e)\n        \u003cspan\u003e# Compute the metric.\u003c/span\u003e\n        rouge_results = rouge_scorer.compute(\n            predictions=pred_str,\n            references=label_str,\n            rouge_types=[\u003cspan\u003e\"rouge2\"\u003c/span\u003e, \u003cspan\u003e\"rougeL\"\u003c/span\u003e],\n            use_agregator=\u003cspan\u003eTrue\u003c/span\u003e,\n            use_stemmer=\u003cspan\u003eFalse\u003c/span\u003e,\n        )\n        \u003cspan\u003ereturn\u003c/span\u003e {\n            \u003cspan\u003e\"rouge2\"\u003c/span\u003e: \u003cspan\u003eround\u003c/span\u003e(rouge_results[\u003cspan\u003e'rouge2'\u003c/span\u003e].mid.fmeasure, \u003cspan\u003e4\u003c/span\u003e),\n            \u003cspan\u003e\"rougeL\"\u003c/span\u003e: \u003cspan\u003eround\u003c/span\u003e(rouge_results[\u003cspan\u003e'rougeL'\u003c/span\u003e].mid.fmeasure, \u003cspan\u003e4\u003c/span\u003e),\n        }\n    \u003cspan\u003ereturn\u003c/span\u003e compute_rouge_metrics\n\nrouge_metric_fn = rouge_metric_builder(tokenizer)\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe load our model and set some parameters for training and generating.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003eimport\u003c/span\u003e torch\n\nDEVICE = \u003cspan\u003e\"cuda:0\"\u003c/span\u003e \u003cspan\u003eif\u003c/span\u003e torch.cuda.is_available() \u003cspan\u003eelse\u003c/span\u003e \u003cspan\u003e\"cpu\"\u003c/span\u003e\nRANDOM_SEED = \u003cspan\u003e42\u003c/span\u003e\nBEAM_SIZE = \u003cspan\u003e4\u003c/span\u003e\n\nmodel = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)\nmodel = model.to(DEVICE)\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eFantastic, now all we have to do is set up our trainer class with everything we defined so far and run it!\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003etrain_args = Seq2SeqTrainingArguments(\n    output_dir=\u003cspan\u003e\"BART-commongen\"\u003c/span\u003e,\n    evaluation_strategy=\u003cspan\u003e\"epoch\"\u003c/span\u003e, \n    save_strategy=\u003cspan\u003e\"epoch\"\u003c/span\u003e,\n    logging_steps=\u003cspan\u003e100\u003c/span\u003e,\n    \u003cspan\u003e# optimization args, the trainer uses the Adam optimizer\u003c/span\u003e\n    \u003cspan\u003e# and has a linear warmup for the learning rate\u003c/span\u003e\n    per_device_train_batch_size=\u003cspan\u003e32\u003c/span\u003e,\n    per_device_eval_batch_size=\u003cspan\u003e32\u003c/span\u003e,\n    gradient_accumulation_steps=\u003cspan\u003e1\u003c/span\u003e,\n    learning_rate=\u003cspan\u003e1e-04\u003c/span\u003e,\n    num_train_epochs=\u003cspan\u003e3\u003c/span\u003e,\n    warmup_steps=\u003cspan\u003e1000\u003c/span\u003e,\n    \u003cspan\u003e# misc args\u003c/span\u003e\n    seed=RANDOM_SEED,\n    disable_tqdm=\u003cspan\u003eFalse\u003c/span\u003e,\n    load_best_model_at_end=\u003cspan\u003eTrue\u003c/span\u003e,\n    metric_for_best_model=\u003cspan\u003e\"rouge2\"\u003c/span\u003e,\n    \u003cspan\u003e# generation\u003c/span\u003e\n    predict_with_generate=\u003cspan\u003eTrue\u003c/span\u003e,\n)\n\ntrainer = Seq2SeqTrainer(\n    model=model,\n    args=train_args,\n    train_dataset=train_data_tokenized,\n    eval_dataset=valid_data_tokenized,\n    tokenizer=tokenizer,\n    compute_metrics=rouge_metric_fn,\n)\n\ntrainer._max_length = MAX_LENGTH\ntrainer._num_beams = BEAM_SIZE\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAnd finally:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003e\u003e trainer.train()\n\nEpoch\tTraining Loss\tValidation Loss\tRouge2\t    Rougel\n\u003cspan\u003e1\u003c/span\u003e\t\u003cspan\u003e0.953500\u003c/span\u003e\t\u003cspan\u003e1.113132\u003c/span\u003e\t\u003cspan\u003e0.122500\u003c/span\u003e\t\u003cspan\u003e0.322200\u003c/span\u003e\n\u003cspan\u003e2\u003c/span\u003e\t\u003cspan\u003e0.825300\u003c/span\u003e\t\u003cspan\u003e1.132310\u003c/span\u003e\t\u003cspan\u003e0.133800\u003c/span\u003e\t\u003cspan\u003e0.324600\u003c/span\u003e\n\u003cspan\u003e3\u003c/span\u003e\t\u003cspan\u003e0.709400\u003c/span\u003e\t\u003cspan\u003e1.133418\u003c/span\u003e\t\u003cspan\u003e0.129300\u003c/span\u003e\t\u003cspan\u003e0.324700\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe now have a model that achieves 12.9 ROUGE-2 which can obviously still be tuned, but it is a great starting point.\u003c/p\u003e\n\u003ch2 id=\"user-content-generating-and-evaluating-predictions\"\u003eGenerating and evaluating predictions\u003c/h2\u003e\n\u003cp\u003eGiven that we now have our fine-tuned model, we can use it to generate outputs for evaluation. For this, let's build another utility function that handles tokenizing, generating with beam search decoding, and de-tokenizing.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003ebeam_generate_sentences\u003c/span\u003e(\u003cspan\u003e\n    batch,\n    model,\n    tokenizer,\n    num_beams=\u003cspan\u003e4\u003c/span\u003e,\n    max_length=\u003cspan\u003e32\u003c/span\u003e,\n    device=\u003cspan\u003e'cuda:0'\u003c/span\u003e\n\u003c/span\u003e):\n    \u003cspan\u003e\"\"\"Generate outputs from a model with beam search decoding.\"\"\"\u003c/span\u003e\n    \u003cspan\u003e# Create batch inputs.\u003c/span\u003e\n    source, _ = construct_input_for_batch(batch)\n    \u003cspan\u003e# Use the model's tokenizer to create the batch input_ids.\u003c/span\u003e\n    batch_features = tokenizer(source, padding=\u003cspan\u003eTrue\u003c/span\u003e, return_tensors=\u003cspan\u003e'pt'\u003c/span\u003e)\n    \u003cspan\u003e# Move all inputs to the device.\u003c/span\u003e\n    batch_features = \u003cspan\u003edict\u003c/span\u003e([(k, v.to(device)) \u003cspan\u003efor\u003c/span\u003e k, v \u003cspan\u003ein\u003c/span\u003e batch_features.items()])\n\n    \u003cspan\u003e# Generate with beam search.\u003c/span\u003e\n    generated_ids = model.generate(\n        **batch_features,\n        num_beams=num_beams,\n        max_length=max_length,\n    )\n\n    \u003cspan\u003e# Use model tokenizer to decode to text.\u003c/span\u003e\n    generated_sentences = [\n        tokenizer.decode(gen_ids.tolist(), skip_special_tokens=\u003cspan\u003eTrue\u003c/span\u003e)\n        \u003cspan\u003efor\u003c/span\u003e gen_ids \u003cspan\u003ein\u003c/span\u003e generated_ids\n    ]\n    \u003cspan\u003ereturn\u003c/span\u003e generated_sentences\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe can quickly apply this function across our validation set as a sanity check.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003evalid_output = data[\u003cspan\u003e'validation'\u003c/span\u003e].\u003cspan\u003emap\u003c/span\u003e(\n    \u003cspan\u003elambda\u003c/span\u003e batch: {\u003cspan\u003e'generated'\u003c/span\u003e: beam_generate_sentences(\n        batch,\n        model,\n        tokenizer,\n        num_beams=BEAM_SIZE,\n        max_length=MAX_LENGTH,\n        device=DEVICE)\n    },\n    batched=\u003cspan\u003eTrue\u003c/span\u003e,\n    batch_size=\u003cspan\u003e128\u003c/span\u003e,\n)\n\n\u003cspan\u003e# Evaluate for ROUGE-2/L\u003c/span\u003e\nrouge_results = rouge_scorer.compute(\n    predictions=valid_output[\u003cspan\u003e\"generated\"\u003c/span\u003e],\n    references=valid_output[\u003cspan\u003e\"target\"\u003c/span\u003e],\n    rouge_types=[\u003cspan\u003e\"rouge2\"\u003c/span\u003e, \u003cspan\u003e\"rougeL\"\u003c/span\u003e],\n    use_agregator=\u003cspan\u003eTrue\u003c/span\u003e, use_stemmer=\u003cspan\u003eFalse\u003c/span\u003e,\n)\n\n\u003cspan\u003ef\"R-2: \u003cspan\u003e{rouge_results[\u003cspan\u003e'rouge2'\u003c/span\u003e].mid.fmeasure:\u003cspan\u003e.3\u003c/span\u003ef}\u003c/span\u003e R-L: \u003cspan\u003e{rouge_results[\u003cspan\u003e'rougeL'\u003c/span\u003e].mid.fmeasure:\u003cspan\u003e.3\u003c/span\u003ef}\u003c/span\u003e\"\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAs expected, this yields the following output:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e'R-2: 0.134 R-L: 0.325'\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003ch2 id=\"user-content-generating-and-submitting-test-predictions\"\u003eGenerating and submitting test predictions\u003c/h2\u003e\n\u003cp\u003eYou can submit your model along with test predictions via our \u003ca href=\"https://forms.gle/vbTZDMCuqzok8tTA9\"\u003esubmission form\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-format-description\"\u003eFormat description\u003c/h3\u003e\n\u003cp\u003ePlease follow this format for your submission file:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e{\u003c/span\u003e\n  \u003cspan\u003e\"submission_name\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"An identifying name of your system\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"param_count\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e123\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e # the number of parameters your system has.\n  \u003cspan\u003e\"description\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"An optional brief description of the system that will be shown on the website\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"tasks\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e\n    \u003cspan\u003e{\u003c/span\u003e\n      \u003cspan\u003e\"dataset_identifier\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e{\u003c/span\u003e\n        \u003cspan\u003e\"values\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e\"output1\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"output2\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"...\"\u003c/span\u003e\u003cspan\u003e]\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e # A list of system outputs\n        # Optionally\u003cspan\u003e,\u003c/span\u003e you can add the keys which are part of an example to ensure that there is no shuffling mistakes.\n        \u003cspan\u003e\"keys\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e\"schema_guided_dialog-test-9585\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e \u003cspan\u003e\"schema_guided_dialog-test-9585\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e ...\u003cspan\u003e]\u003c/span\u003e\n        \u003cspan\u003e}\u003c/span\u003e\n    \u003cspan\u003e}\u003c/span\u003e\n\u003cspan\u003e}\u003c/span\u003e\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIn this case, \u003ccode\u003edataset_identifier\u003c/code\u003e is the identifier of the dataset followed by an identifier of the set the outputs were created from, for example \u003ccode\u003e_validation\u003c/code\u003e or \u003ccode\u003e_test\u003c/code\u003e. That means, the common_gen validation set would have the identifier \u003ccode\u003ecommon_gen_validation\u003c/code\u003e.\u003c/p\u003e\n\u003cp\u003eThe \u003ccode\u003ekeys\u003c/code\u003e field can be set to avoid accidental shuffling to impact your metrics. Simply add a list of the \u003ccode\u003egem_id\u003c/code\u003e for each output example in the same order as your values.\u003c/p\u003e\n\u003ch3 id=\"user-content-formatting-your-predictions\"\u003eFormatting Your Predictions\u003c/h3\u003e\n\u003cp\u003eFor our tutorial, let's say we want to include results for the validation set and challenge set (\u003ccode\u003ecommon_gen_challenge_train_sample\u003c/code\u003e) outputs.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003echallenge_train_sample_output = data[\u003cspan\u003e\"challenge_train_sample\"\u003c/span\u003e].\u003cspan\u003emap\u003c/span\u003e(\n    \u003cspan\u003elambda\u003c/span\u003e batch: {\n        \u003cspan\u003e'generated'\u003c/span\u003e: beam_generate_sentences(\n            batch,\n            model,\n            tokenizer,\n            num_beams=BEAM_SIZE,\n            max_length=MAX_LENGTH,\n            device=DEVICE)\n    },\n    batched=\u003cspan\u003eTrue\u003c/span\u003e,\n    batch_size=\u003cspan\u003e128\u003c/span\u003e,\n)\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eWe add a \u003ccode\u003egenerated\u003c/code\u003e field into the dataset which makes analysis much easier. However, in our submission file we only want the actual values and corresponding IDs. Thus, we filter:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003evalid_formatted = [o[\u003cspan\u003e'generated'\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e o \u003cspan\u003ein\u003c/span\u003e valid_output]\nvalid_keys = [o[\u003cspan\u003e'gem_id'\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e o \u003cspan\u003ein\u003c/span\u003e data[\u003cspan\u003e'validation'\u003c/span\u003e]]\n\nchallenge_train_sample_formatted = [o[\u003cspan\u003e'generated'\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e o \u003cspan\u003ein\u003c/span\u003e challenge_train_sample_output]\nchallenge_train_sample_keys = [o[\u003cspan\u003e'gem_id'\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e o \u003cspan\u003ein\u003c/span\u003e data[\u003cspan\u003e'challenge_train_sample'\u003c/span\u003e]]\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIn our final step, we only have to add the outputs to our larger submission construct.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003eSUBMISSION_NAME = \u003cspan\u003e\"An identifying name of your system\"\u003c/span\u003e\nDESCRIPTION = \u003cspan\u003e\"An optional brief description of the system that will be shown on the website\"\u003c/span\u003e\n\nsubmission_dict = {\n    \u003cspan\u003e\"submission_name\"\u003c/span\u003e: SUBMISSION_NAME ,\n    \u003cspan\u003e\"param_count\"\u003c/span\u003e: \u003cspan\u003esum\u003c/span\u003e(p.numel() \u003cspan\u003efor\u003c/span\u003e p \u003cspan\u003ein\u003c/span\u003e model.parameters()),\n    \u003cspan\u003e\"description\"\u003c/span\u003e: DESCRIPTION,\n    \u003cspan\u003e\"tasks\"\u003c/span\u003e: {\n      \u003cspan\u003e\"common_gen_validation\"\u003c/span\u003e: {\n          \u003cspan\u003e\"values\"\u003c/span\u003e: valid_formatted, \n          \u003cspan\u003e\"keys\"\u003c/span\u003e: valid_keys\n          }\n    }\n}\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eThis format is scalable to more tasks: you simply need to add more outputs to the \u003ccode\u003etasks\u003c/code\u003e subfield.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e# Submit results for challenge set.\u003c/span\u003e\nnew_task_name = \u003cspan\u003e\"common_gen_challenge_train_sample\"\u003c/span\u003e\nnew_task_data = {\n    \u003cspan\u003e\"values\"\u003c/span\u003e: challenge_train_sample_formatted, \n    \u003cspan\u003e\"keys\"\u003c/span\u003e: challenge_train_sample_keys\n} \nsubmission_dict[\u003cspan\u003e\"tasks\"\u003c/span\u003e][new_task_name] = new_task_data\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eThe last step is to write our submission dictionary to a file.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003eimport\u003c/span\u003e json\n\u003cspan\u003ewith\u003c/span\u003e \u003cspan\u003eopen\u003c/span\u003e(\u003cspan\u003e'gem_submission.json'\u003c/span\u003e, \u003cspan\u003e'w'\u003c/span\u003e) \u003cspan\u003eas\u003c/span\u003e f:\n    f.write(json.dumps(submission_dict))\u003c/code\u003e\u003c/pre\u003e\n\u003ch2 id=\"user-content-evaluating-your-submission-file-with-the-gem-evaluation-framework\"\u003eEvaluating your submission file with the GEM evaluation framework\u003c/h2\u003e\n\u003cp\u003eObviously, we do not want to rely only on ROUGE scores. For this, we developed the GEM evaluation framework.\u003c/p\u003e\n\u003cp\u003eYou can download it by running:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003egit \u003cspan\u003eclone\u003c/span\u003e git@github.com:GEM-benchmark/GEM-metrics.git\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eInstall the required packages:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003ecd\u003c/span\u003e GEM-metrics\npip install -r requirements.txt\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAssuming that you formatted and saved your outputs correctly, you can now run\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003epython run_metrics.py [-r references.json] [-o outputs.scores.json] gem_submission.json\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003ewhich will create a json file with your scores per task and challenge set. Please follow the \u003ca href=\"https://github.com/GEM-benchmark/GEM-metrics\"\u003eREADME\u003c/a\u003e for more detailed usage information.\u003c/p\u003e\n","title":"From pretrained model to submission","type":"Modeling","background":"This tutorial shows the entire pipeline from loading the data, creating a model, to formatting the submission file from predictions."}},"__N_SSG":true},"page":"/tutorials/[id]","query":{"id":"modeling"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/tutorials/new_data_loader.html b/tutorials/new_data_loader.html
index 4455f0cf..89258da7 100644
--- a/tutorials/new_data_loader.html
+++ b/tutorials/new_data_loader.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Adding a data loader</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials/%5Bid%5D-78530e674236e7c8.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Adding a data loader</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data</span><div><p>We are using the HuggingFace hub to host all new datasets. All you will have to do is to upload your dataset, along with potential challenge splits, using the steps outlined below.</p>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Adding a data loader</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials/%5Bid%5D-69574b54cf872f16.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Adding a data loader</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data</span><div><p>We are using the HuggingFace hub to host all new datasets. All you will have to do is to upload your dataset, along with potential challenge splits, using the steps outlined below.</p>
 <h2 id="user-content-table-of-contents">Table of Contents</h2>
 <ul>
 <li><a href="#setup">Setup</a>
@@ -82,4 +82,4 @@ <h3 id="user-content-403-client-error-forbidden-for-url">403 Client Error: Forbi
 <p>You may encounter the following when trying to create a dataset:
 <code>403 Client Error: Forbidden for url: https://huggingface.co/api/repos/create - You don't have the rights to create a dataset under this namespace</code></p>
 <p>This happens when you are not part of the organization or have a typo in the creation command. Ensure that you are (1) logged in, (2) member of the GEM organization, and (3) have typed the <code>--organization GEM</code> command using all upper case letters.</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003cp\u003eWe are using the HuggingFace hub to host all new datasets. All you will have to do is to upload your dataset, along with potential challenge splits, using the steps outlined below.\u003c/p\u003e\n\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#setup\"\u003eSetup\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#create-a-huggingface-account\"\u003eCreate a HuggingFace account\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#join-the-gem-organization\"\u003eJoin the GEM Organization\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#install-prerequisites\"\u003eInstall prerequisites\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#creating-the-dataset\"\u003eCreating the dataset\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#set-up-the-repository\"\u003eSet-up the repository\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#preparing-the-files\"\u003ePreparing the files\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#uploading-the-dataset\"\u003eUploading the dataset\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#potential-errors\"\u003ePotential Errors\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#git-lfs-is-not-a-git-command\"\u003egit: 'lfs' is not a git command.\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#403-client-error-forbidden-for-url\"\u003e403 Client Error: Forbidden for url\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-setup\"\u003eSetup\u003c/h2\u003e\n\u003ch3 id=\"user-content-create-a-huggingface-account\"\u003eCreate a HuggingFace account\u003c/h3\u003e\n\u003cp\u003eTo upload the dataset, you will need a HuggingFace account. If you already have one, you can skip this step.\u003c/p\u003e\n\u003cp\u003eIf not, go to \u003ca href=\"https://huggingface.co/join\"\u003ehuggingface.co/join\u003c/a\u003e and create an account.\u003c/p\u003e\n\u003ch3 id=\"user-content-join-the-gem-organization\"\u003eJoin the GEM Organization\u003c/h3\u003e\n\u003cp\u003eWe are hosting all datasets in the GEM organization which you can join by following \u003ca href=\"https://huggingface.co/organizations/GEM/share/PDrZPZBSZTtlPlsgCCYfTCbrQAOgrUNZmk\"\u003ethis link\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-install-prerequisites\"\u003eInstall prerequisites\u003c/h3\u003e\n\u003cp\u003eTo install all the requirements, follow the steps below:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e# Install the hub interface.\u003c/span\u003e\npip install huggingface_hub\n\u003cspan\u003e# Install Git for large files.\u003c/span\u003e\ngit lfs install\n\u003cspan\u003e# Log in to the hub\u003c/span\u003e\nhuggingface-cli login\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eGreat, you are now prepared to create your dataset!\u003c/p\u003e\n\u003ch2 id=\"user-content-creating-the-dataset\"\u003eCreating the dataset\u003c/h2\u003e\n\u003cp\u003eThe following steps largely follow \u003ca href=\"https://huggingface.co/docs/datasets/share.html#add-a-community-dataset\"\u003ethis tutorial\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-set-up-the-repository\"\u003eSet-up the repository\u003c/h3\u003e\n\u003cp\u003eFirst, we will create the empty repository we will use to host the dataset.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003ehuggingface-cli repo create YOUR_DATASET_NAME --\u003cspan\u003etype\u003c/span\u003e dataset --organization GEM\n\u003cspan\u003e# Once created, we can download it.\u003c/span\u003e\ngit \u003cspan\u003eclone\u003c/span\u003e https://huggingface.co/datasets/GEM/YOUR_DATASET_NAME\u003c/code\u003e\u003c/pre\u003e\n\u003ch3 id=\"user-content-preparing-the-files\"\u003ePreparing the files\u003c/h3\u003e\n\u003cp\u003eYou will need to add the following files to the repository.\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003e\n\u003cp\u003e\u003ccode\u003eyour_dataset_name.json\u003c/code\u003e is a Dataset card that is created following our other tutorial and using our \u003ca href=\"https://huggingface.co/spaces/GEM/DatasetCardForm\"\u003ecollection tool\u003c/a\u003e. If you are completing the data part first, feel free to leave it empty for now. However, only a dataset with completed data card is part of GEM, so please add it once it is ready.\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eThe raw data files of the dataset (optional, if they are hosted elsewhere you can specify the URLs in the dataset script).\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003e\u003ccode\u003eyour_dataset_name.py\u003c/code\u003e is your dataset loading script (optional if your data files are already in the supported formats csv/jsonl/json/parquet/txt). For information on how to create a dataset script, see the \u003ca href=\"https://huggingface.co/docs/datasets/dataset_script.html\"\u003edocumentation\u003c/a\u003e. You can start from the \u003ca href=\"https://github.com/huggingface/datasets/blob/master/templates/new_dataset_script.py\"\u003etemplate\u003c/a\u003e and simply fill in the details.\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003e\u003ccode\u003edataset_infos.json\u003c/code\u003e contains metadata about the dataset (required only if you have a dataset script).\u003c/p\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eWhile we don't have strong restrictions on the dataset formats, please follow the guidelines:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003eEach dataset should have splits named \u003ccode\u003etrain\u003c/code\u003e, \u003ccode\u003evalidation\u003c/code\u003e, and \u003ccode\u003etest\u003c/code\u003e. Additional challenge sets splits can be named \u003ccode\u003echallenge_${name}\u003c/code\u003e for consistency.\u003c/li\u003e\n\u003cli\u003eEach split should have a field called \u003ccode\u003egem_id\u003c/code\u003e which has the naming convention of \u003ccode\u003eGEM-${DATASET_NAME}-${SPLIT-NAME}-${id}\u003c/code\u003e where \u003ccode\u003eid\u003c/code\u003e is an incrementing number starting at 1. Please look \u003ca href=\"https://github.com/huggingface/datasets/blob/master/datasets/gem/gem.py#L936\"\u003eat our existing data loader\u003c/a\u003e for reference.\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-uploading-the-dataset\"\u003eUploading the dataset\u003c/h3\u003e\n\u003cp\u003eFirst add all the dataset files to \u003ccode\u003egit lfs\u003c/code\u003e tracking and then use git as usual to track all other files.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003ecp\u003c/span\u003e /somewhere/data/*.json .\ngit lfs track *.json\ngit add .gitattributes\ngit add *.json\ngit commit -m \u003cspan\u003e\"add json files\"\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAfterwards you can also add the data card and all other files and commit them. Once everything is ready, simply run \u003ccode\u003egit push\u003c/code\u003e. After you enter your HuggingFace username and password, everything will be uploaded to the Hub!\nYou can find an example dataset with a challenge dataset (generated using an NL-Augmenter transformation) \u003ca href=\"https://huggingface.co/datasets/GEM/test-transform/tree/main\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003eYou can update the dataset simply by pushing updates the same way.\u003c/p\u003e\n\u003ch2 id=\"user-content-potential-errors\"\u003ePotential Errors\u003c/h2\u003e\n\u003ch3 id=\"user-content-git-lfs-is-not-a-git-command\"\u003egit: 'lfs' is not a git command.\u003c/h3\u003e\n\u003cp\u003egit lfs needs to be installed separately. Depending on your operating system, you can follow \u003ca href=\"https://stackoverflow.com/questions/48734119/git-lfs-is-not-a-git-command-unclear\"\u003ethis post\u003c/a\u003e to solve the issue.\u003c/p\u003e\n\u003ch3 id=\"user-content-403-client-error-forbidden-for-url\"\u003e403 Client Error: Forbidden for url\u003c/h3\u003e\n\u003cp\u003eYou may encounter the following when trying to create a dataset:\n\u003ccode\u003e403 Client Error: Forbidden for url: https://huggingface.co/api/repos/create - You don't have the rights to create a dataset under this namespace\u003c/code\u003e\u003c/p\u003e\n\u003cp\u003eThis happens when you are not part of the organization or have a typo in the creation command. Ensure that you are (1) logged in, (2) member of the GEM organization, and (3) have typed the \u003ccode\u003e--organization GEM\u003c/code\u003e command using all upper case letters.\u003c/p\u003e\n","title":"Adding a data loader","type":"Data","background":"This tutorial shows how to add your dataset to GEM."}},"__N_SSG":true},"page":"/tutorials/[id]","query":{"id":"new_data_loader"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003cp\u003eWe are using the HuggingFace hub to host all new datasets. All you will have to do is to upload your dataset, along with potential challenge splits, using the steps outlined below.\u003c/p\u003e\n\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#setup\"\u003eSetup\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#create-a-huggingface-account\"\u003eCreate a HuggingFace account\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#join-the-gem-organization\"\u003eJoin the GEM Organization\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#install-prerequisites\"\u003eInstall prerequisites\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#creating-the-dataset\"\u003eCreating the dataset\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#set-up-the-repository\"\u003eSet-up the repository\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#preparing-the-files\"\u003ePreparing the files\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#uploading-the-dataset\"\u003eUploading the dataset\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#potential-errors\"\u003ePotential Errors\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#git-lfs-is-not-a-git-command\"\u003egit: 'lfs' is not a git command.\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#403-client-error-forbidden-for-url\"\u003e403 Client Error: Forbidden for url\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-setup\"\u003eSetup\u003c/h2\u003e\n\u003ch3 id=\"user-content-create-a-huggingface-account\"\u003eCreate a HuggingFace account\u003c/h3\u003e\n\u003cp\u003eTo upload the dataset, you will need a HuggingFace account. If you already have one, you can skip this step.\u003c/p\u003e\n\u003cp\u003eIf not, go to \u003ca href=\"https://huggingface.co/join\"\u003ehuggingface.co/join\u003c/a\u003e and create an account.\u003c/p\u003e\n\u003ch3 id=\"user-content-join-the-gem-organization\"\u003eJoin the GEM Organization\u003c/h3\u003e\n\u003cp\u003eWe are hosting all datasets in the GEM organization which you can join by following \u003ca href=\"https://huggingface.co/organizations/GEM/share/PDrZPZBSZTtlPlsgCCYfTCbrQAOgrUNZmk\"\u003ethis link\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-install-prerequisites\"\u003eInstall prerequisites\u003c/h3\u003e\n\u003cp\u003eTo install all the requirements, follow the steps below:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e# Install the hub interface.\u003c/span\u003e\npip install huggingface_hub\n\u003cspan\u003e# Install Git for large files.\u003c/span\u003e\ngit lfs install\n\u003cspan\u003e# Log in to the hub\u003c/span\u003e\nhuggingface-cli login\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eGreat, you are now prepared to create your dataset!\u003c/p\u003e\n\u003ch2 id=\"user-content-creating-the-dataset\"\u003eCreating the dataset\u003c/h2\u003e\n\u003cp\u003eThe following steps largely follow \u003ca href=\"https://huggingface.co/docs/datasets/share.html#add-a-community-dataset\"\u003ethis tutorial\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-set-up-the-repository\"\u003eSet-up the repository\u003c/h3\u003e\n\u003cp\u003eFirst, we will create the empty repository we will use to host the dataset.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003ehuggingface-cli repo create YOUR_DATASET_NAME --\u003cspan\u003etype\u003c/span\u003e dataset --organization GEM\n\u003cspan\u003e# Once created, we can download it.\u003c/span\u003e\ngit \u003cspan\u003eclone\u003c/span\u003e https://huggingface.co/datasets/GEM/YOUR_DATASET_NAME\u003c/code\u003e\u003c/pre\u003e\n\u003ch3 id=\"user-content-preparing-the-files\"\u003ePreparing the files\u003c/h3\u003e\n\u003cp\u003eYou will need to add the following files to the repository.\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003e\n\u003cp\u003e\u003ccode\u003eyour_dataset_name.json\u003c/code\u003e is a Dataset card that is created following our other tutorial and using our \u003ca href=\"https://huggingface.co/spaces/GEM/DatasetCardForm\"\u003ecollection tool\u003c/a\u003e. If you are completing the data part first, feel free to leave it empty for now. However, only a dataset with completed data card is part of GEM, so please add it once it is ready.\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003eThe raw data files of the dataset (optional, if they are hosted elsewhere you can specify the URLs in the dataset script).\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003e\u003ccode\u003eyour_dataset_name.py\u003c/code\u003e is your dataset loading script (optional if your data files are already in the supported formats csv/jsonl/json/parquet/txt). For information on how to create a dataset script, see the \u003ca href=\"https://huggingface.co/docs/datasets/dataset_script.html\"\u003edocumentation\u003c/a\u003e. You can start from the \u003ca href=\"https://github.com/huggingface/datasets/blob/master/templates/new_dataset_script.py\"\u003etemplate\u003c/a\u003e and simply fill in the details.\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp\u003e\u003ccode\u003edataset_infos.json\u003c/code\u003e contains metadata about the dataset (required only if you have a dataset script).\u003c/p\u003e\n\u003c/li\u003e\n\u003c/ol\u003e\n\u003cp\u003eWhile we don't have strong restrictions on the dataset formats, please follow the guidelines:\u003c/p\u003e\n\u003col\u003e\n\u003cli\u003eEach dataset should have splits named \u003ccode\u003etrain\u003c/code\u003e, \u003ccode\u003evalidation\u003c/code\u003e, and \u003ccode\u003etest\u003c/code\u003e. Additional challenge sets splits can be named \u003ccode\u003echallenge_${name}\u003c/code\u003e for consistency.\u003c/li\u003e\n\u003cli\u003eEach split should have a field called \u003ccode\u003egem_id\u003c/code\u003e which has the naming convention of \u003ccode\u003eGEM-${DATASET_NAME}-${SPLIT-NAME}-${id}\u003c/code\u003e where \u003ccode\u003eid\u003c/code\u003e is an incrementing number starting at 1. Please look \u003ca href=\"https://github.com/huggingface/datasets/blob/master/datasets/gem/gem.py#L936\"\u003eat our existing data loader\u003c/a\u003e for reference.\u003c/li\u003e\n\u003c/ol\u003e\n\u003ch3 id=\"user-content-uploading-the-dataset\"\u003eUploading the dataset\u003c/h3\u003e\n\u003cp\u003eFirst add all the dataset files to \u003ccode\u003egit lfs\u003c/code\u003e tracking and then use git as usual to track all other files.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003ecp\u003c/span\u003e /somewhere/data/*.json .\ngit lfs track *.json\ngit add .gitattributes\ngit add *.json\ngit commit -m \u003cspan\u003e\"add json files\"\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eAfterwards you can also add the data card and all other files and commit them. Once everything is ready, simply run \u003ccode\u003egit push\u003c/code\u003e. After you enter your HuggingFace username and password, everything will be uploaded to the Hub!\nYou can find an example dataset with a challenge dataset (generated using an NL-Augmenter transformation) \u003ca href=\"https://huggingface.co/datasets/GEM/test-transform/tree/main\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003eYou can update the dataset simply by pushing updates the same way.\u003c/p\u003e\n\u003ch2 id=\"user-content-potential-errors\"\u003ePotential Errors\u003c/h2\u003e\n\u003ch3 id=\"user-content-git-lfs-is-not-a-git-command\"\u003egit: 'lfs' is not a git command.\u003c/h3\u003e\n\u003cp\u003egit lfs needs to be installed separately. Depending on your operating system, you can follow \u003ca href=\"https://stackoverflow.com/questions/48734119/git-lfs-is-not-a-git-command-unclear\"\u003ethis post\u003c/a\u003e to solve the issue.\u003c/p\u003e\n\u003ch3 id=\"user-content-403-client-error-forbidden-for-url\"\u003e403 Client Error: Forbidden for url\u003c/h3\u003e\n\u003cp\u003eYou may encounter the following when trying to create a dataset:\n\u003ccode\u003e403 Client Error: Forbidden for url: https://huggingface.co/api/repos/create - You don't have the rights to create a dataset under this namespace\u003c/code\u003e\u003c/p\u003e\n\u003cp\u003eThis happens when you are not part of the organization or have a typo in the creation command. Ensure that you are (1) logged in, (2) member of the GEM organization, and (3) have typed the \u003ccode\u003e--organization GEM\u003c/code\u003e command using all upper case letters.\u003c/p\u003e\n","title":"Adding a data loader","type":"Data","background":"This tutorial shows how to add your dataset to GEM."}},"__N_SSG":true},"page":"/tutorials/[id]","query":{"id":"new_data_loader"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/tutorials/new_nl_augmenter_transformation.html b/tutorials/new_nl_augmenter_transformation.html
index 70ecb112..081fbfb4 100644
--- a/tutorials/new_nl_augmenter_transformation.html
+++ b/tutorials/new_nl_augmenter_transformation.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Using and Adding Transformation to NL-Augmenter</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials/%5Bid%5D-78530e674236e7c8.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Using and Adding Transformation to NL-Augmenter</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Transformation</span><div><p>This tutorial will demonstrate how to add a new transformation process to the NL-Augmenter library, which researchers can then use to generate new augmented datasets.
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Using and Adding Transformation to NL-Augmenter</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials/%5Bid%5D-69574b54cf872f16.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Using and Adding Transformation to NL-Augmenter</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Transformation</span><div><p>This tutorial will demonstrate how to add a new transformation process to the NL-Augmenter library, which researchers can then use to generate new augmented datasets.
 We will not cover using filters in this tutorial. If you are interested in using filters, check out the tutorial <a href="https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_Hackathon_2021_filters_tutorial.ipynb">here</a>.</p>
 <h2 id="user-content-using-transformations">Using transformations</h2>
 <p>A transformation is represented as a python class.
@@ -97,4 +97,4 @@ <h4 id="user-content-create-your-pull-request">Create your pull request</h4>
 <h2 id="user-content-conclusion">Conclusion</h2>
 <p>And that’s it. NL-Augmenter has an active community of maintainers who will review your request and add your work to the project. In the meantime, others can try out your tranformation by cloning your fork.</p>
 <p>Congratulations on contributing!</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003cp\u003eThis tutorial will demonstrate how to add a new transformation process to the NL-Augmenter library, which researchers can then use to generate new augmented datasets.\nWe will not cover using filters in this tutorial. If you are interested in using filters, check out the tutorial \u003ca href=\"https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_Hackathon_2021_filters_tutorial.ipynb\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-using-transformations\"\u003eUsing transformations\u003c/h2\u003e\n\u003cp\u003eA transformation is represented as a python class.\nTo use a transformation, instantiate an instance of the transformation class and call it via \u003ccode\u003etransformation.generate\u003c/code\u003e.\nSee the code snippet below for an example.\nNote that most transformations have unique parameters and \u003ccode\u003egenerate\u003c/code\u003e signatures.\nPlease refer to the README and code of the specific transformation you're interested in using for details.\nYou can find a full list of transformations \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/tree/main/transformations\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e transformations.butter_fingers_perturbation \u003cspan\u003eimport\u003c/span\u003e ButterFingersPerturbation\ntransform = ButterFingersPerturbation(prob=\u003cspan\u003e0.2\u003c/span\u003e)\ncorpus = [\u003cspan\u003e'This is the first sentence.'\u003c/span\u003e, \u003cspan\u003e'This is the second sentence.'\u003c/span\u003e]\n\u003cspan\u003e# transformations return a list \u003c/span\u003e\ntransformed_corpus = [transform.generate(sentence)[\u003cspan\u003e0\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e sentence \u003cspan\u003ein\u003c/span\u003e corpus]\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eCurrently, transformations \u003cem\u003edo not\u003c/em\u003e support batch mode, so you will need to pass your sentences into \u003ccode\u003etransform.generate\u003c/code\u003e individually.\nAfter transforming your corpus, save your data in the preferred format (ideally \u003ccode\u003ejson\u003c/code\u003e or \u003ccode\u003ejsonlines\u003c/code\u003e).\nThen follow \u003ca href=\"https://gem-benchmark.com/tutorials/new_data_loader\"\u003ethis tutorial\u003c/a\u003e to add your dataset to GEM.\u003c/p\u003e\n\u003ch2 id=\"user-content-adding-transformations\"\u003eAdding transformations\u003c/h2\u003e\n\u003ch3 id=\"user-content-preliminaries\"\u003ePreliminaries\u003c/h3\u003e\n\u003ch4 id=\"user-content-check-for-novelty\"\u003eCheck for novelty\u003c/h4\u003e\n\u003cp\u003eFirst step, and probably the most important one, is to carefully read through the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/tree/main/transformations\"\u003elist of existing transformations\u003c/a\u003e, making sure that no one has already covered the ground you’re proposing. There are dozens of existing systems and many more that are under review, so why duplicate existing work?\u003c/p\u003e\n\u003cp\u003eMake sure to check the \u003cstrong\u003etransformations\u003c/strong\u003e subdirectory in the project repo, as well as the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/pulls\"\u003elist of pull requests\u003c/a\u003e.\u003c/p\u003e\n\u003ch4 id=\"user-content-check-the-documentation\"\u003eCheck the documentation\u003c/h4\u003e\n\u003cp\u003eSo, you’re certain that your contribution will be novel. Now, the next step is to go to the project repo and carefully read through the documentation. Most important for our purposes are the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/README.md\"\u003emain readme document\u003c/a\u003e, plus the readmes for the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/interfaces/README.md\"\u003e\u003cstrong\u003einterfaces\u003c/strong\u003e\u003c/a\u003e and \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/evaluation/README.md\"\u003e\u003cstrong\u003eevaluation\u003c/strong\u003e\u003c/a\u003e directories.\u003c/p\u003e\n\u003cp\u003eReading these will give you a solid understanding of the rest of the steps we’re going to cover.\u003c/p\u003e\n\u003ch3 id=\"user-content-setup\"\u003eSetup\u003c/h3\u003e\n\u003ch4 id=\"user-content-fork-the-project-repo\"\u003eFork the project repo\u003c/h4\u003e\n\u003cp\u003eOnce you’re clear on the documentation, go ahead and fork the repository. New transformations should be submitted as pull requests to the main NL-Augmenter repo.\u003c/p\u003e\n\u003cp\u003eClone to your machine, and create a new branch to work in.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003egit \u003cspan\u003eclone\u003c/span\u003e \u003cspan\u003e$PATH_TO_YOUR_FORK\u003c/span\u003e\n\u003cspan\u003ecd\u003c/span\u003e NL-Augmenter\ngit checkout -b new_transformation\u003c/code\u003e\u003c/pre\u003e\n\u003ch4 id=\"user-content-copy-an-existing-transformation-as-a-base\"\u003eCopy an existing transformation as a base\u003c/h4\u003e\n\u003cp\u003eWe’re going to start creating our own transformation by copying an existing simple example.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003ecd\u003c/span\u003e transformations/\n\u003cspan\u003ecp\u003c/span\u003e -r butter_fingers_perturbation new_transformation\n\u003cspan\u003ecd\u003c/span\u003e new_transformation\u003c/code\u003e\u003c/pre\u003e\n\u003ch3 id=\"user-content-create-your-transformation\"\u003eCreate your transformation\u003c/h3\u003e\n\u003ch4 id=\"user-content-update-the-existing-class-info\"\u003eUpdate the existing class info\u003c/h4\u003e\n\u003cp\u003eOpen the \u003ccode\u003etransformation.py\u003c/code\u003e file in your editor of choice. Make sure to rename the class to a name that describes your transformation.\u003c/p\u003e\n\u003cp\u003eThe first thing to define is \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/interfaces/README.md\"\u003ewhat kind of interface\u003c/a\u003e our transformation is going to implement, which tells us what it should accept as input and return as output. As an example, we could choose to implement the QuestionAnswerOperation, which expects a question as a string, some context as a string, and answers as a list of strings, and returns a transformed version.\u003c/p\u003e\n\u003cp\u003eFor our purposes, we’ll be implementing the SentenceOperation interface, which expects only a single piece of text as input and output.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003eclass\u003c/span\u003e \u003cspan\u003eKeyphraseInsertion\u003c/span\u003e(\u003cspan\u003eSentenceOperation\u003c/span\u003e):\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eYou’ll also need to decide which downstream tasks you think your class can be applied to: here, we’re going to choose text classification as an example.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003etasks = [TaskType.TEXT_CLASSIFICATION]\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003ePick the languages this transformation can accept as input using ISO codes here. We’ll stick with English as a default.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003elanguages = [\u003cspan\u003e\"en\"\u003c/span\u003e]\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eThe keywords list is important to fill out. This will give reviewers some idea of the purpose of your transformation, as well as the consequences of using it. You can find a full list and explanation in the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/docs/keywords.md\"\u003edocs/keywords.md\u003c/a\u003e file. I’m going to leave the existing set as they are.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003ekeywords = [\u003cspan\u003e\"morphological\"\u003c/span\u003e, \u003cspan\u003e\"noise\"\u003c/span\u003e, \u003cspan\u003e\"rule-based\"\u003c/span\u003e, \u003cspan\u003e\"high-coverage\"\u003c/span\u003e, \u003cspan\u003e\"high-precision\"\u003c/span\u003e]\u003c/code\u003e\u003c/pre\u003e\n\u003ch4 id=\"user-content-override-the-generate-method\"\u003eOverride the generate method\u003c/h4\u003e\n\u003cp\u003eNow the preliminaries are done, it’s time to create our transformation. We need to override the generate method to carry out our transformation, paying attention to the expected types of the returned values.\u003c/p\u003e\n\u003cp\u003eRemember, for SentenceOperations, the type of the output is a list of strings, so that’s what we should provide. You’ll see our sample class includes a max outputs parameter that controls the number of new outputs generated from a single input sequence. It’s recommended that you use the same format to control how many texts are returned from your transformation.\u003c/p\u003e\n\u003cp\u003eIn our example class, we’re going to insert a keyphrase between every second element in our string.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003e__init__\u003c/span\u003e(\u003cspan\u003eself, seed=\u003cspan\u003e0\u003c/span\u003e, max_outputs=\u003cspan\u003e1\u003c/span\u003e, insert_every=\u003cspan\u003e2\u003c/span\u003e, keyphrase=\u003cspan\u003e\"kp_test\"\u003c/span\u003e\u003c/span\u003e):\n    \u003cspan\u003esuper\u003c/span\u003e().__init__(seed, max_outputs=max_outputs)\n    self.insert_every = insert_every\n    self.keyphrase = keyphrase\n\n\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003egenerate\u003c/span\u003e(\u003cspan\u003eself, sentence: \u003cspan\u003estr\u003c/span\u003e\u003c/span\u003e):\n    perturbed_texts = []\n    itertext = [sentence.split()[i:i + self.insert_every] \u003cspan\u003efor\u003c/span\u003e i \u003cspan\u003ein\u003c/span\u003e\n                \u003cspan\u003erange\u003c/span\u003e(\u003cspan\u003e0\u003c/span\u003e, \u003cspan\u003elen\u003c/span\u003e(sentence.split()), self.insert_every)]\n    \u003cspan\u003efor\u003c/span\u003e i \u003cspan\u003ein\u003c/span\u003e \u003cspan\u003erange\u003c/span\u003e(self.max_outputs):\n        new_text = [\u003cspan\u003ef\"\u003cspan\u003e{\u003cspan\u003e' '\u003c/span\u003e.join(x)}\u003c/span\u003e \u003cspan\u003e{self.keyphrase}\u003c/span\u003e\"\u003c/span\u003e \u003cspan\u003efor\u003c/span\u003e x \u003cspan\u003ein\u003c/span\u003e itertext]\n        perturbed_texts.append(\u003cspan\u003e' '\u003c/span\u003e.join(new_text))\n    \u003cspan\u003ereturn\u003c/span\u003e perturbed_texts\u003c/code\u003e\u003c/pre\u003e\n\u003ch4 id=\"user-content-add-your-requirements\"\u003eAdd your requirements\u003c/h4\u003e\n\u003cp\u003eIf you’ve added any new package requirements, you’re going to need to add them to a \u003ccode\u003erequirements.txt\u003c/code\u003e in the same directory as your transformation. This helps to keep each transformations’ imports separate.\u003c/p\u003e\n\u003cp\u003eFor example, lets add sklearn here, just in case.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003escikit-learn==1.0.1\u003c/code\u003e\u003c/pre\u003e\n\u003ch3 id=\"user-content-test-your-solution\"\u003eTest your solution\u003c/h3\u003e\n\u003cp\u003eNow, it’s time to test your solution on some sample data to make sure it works. First, create a test.json file in the same directory as your transformation, and add at least five test cases.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e{\u003c/span\u003e\n  \u003cspan\u003e\"type\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"new_transformation\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"test_cases\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\n    \u003cspan\u003e{\u003c/span\u003e\n      \u003cspan\u003e\"class\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"KeyphraseInsertion\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n      \u003cspan\u003e\"inputs\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e{\u003c/span\u003e\n        \u003cspan\u003e\"sentence\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"Andrew finally returned the French book to Chris that I bought last week\"\u003c/span\u003e\n      \u003cspan\u003e}\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n      \u003cspan\u003e\"outputs\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e{\u003c/span\u003e\n        \u003cspan\u003e\"sentence\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"Andrew finally kp_test returned the kp_test French book kp_test to Chris kp_test that I kp_test bought last kp_test week kp_test\"\u003c/span\u003e\n      \u003cspan\u003e}\u003c/span\u003e\u003cspan\u003e]\u003c/span\u003e\n    \u003cspan\u003e}\u003c/span\u003e\n  \u003cspan\u003e]\u003c/span\u003e\n\u003cspan\u003e}\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eThen we’ll use pytest to check how it runs.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003epytest -s --t=new_transformation\u003c/code\u003e\u003c/pre\u003e\n\u003ch3 id=\"user-content-finishing-up\"\u003eFinishing up\u003c/h3\u003e\n\u003ch4 id=\"user-content-write-the-readme\"\u003eWrite the readme\u003c/h4\u003e\n\u003cp\u003eOnce we’re sure our transformation is producing good results, it’s time to document it. Open up the \u003ccode\u003ereadme.md\u003c/code\u003e document and add all the important details.\u003c/p\u003e\n\u003cp\u003eMake sure to include your name and contact details, along with referencing any prior work you rely on. Add an explanation of the purpose of your transformation, plus some discussion of the kind of tasks you think it will benefit, plus any limitations you’ve identified.\u003c/p\u003e\n\u003cp\u003eIf you think it would be useful, you can check the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/evaluation/README.md\"\u003eevaluation documentation\u003c/a\u003e for details of how to test your work against benchmark datasets and models, and how to add that to your documentation.\u003c/p\u003e\n\u003ch4 id=\"user-content-create-your-pull-request\"\u003eCreate your pull request\u003c/h4\u003e\n\u003cp\u003eFinally, you’re ready to submit!\u003c/p\u003e\n\u003cp\u003eCommit all your changes to your new branch, then push them back to the origin. The easiest way to create a pull request from your commit is to \u003ca href=\"https://docs.github.com/en/desktop/contributing-and-collaborating-using-github-desktop/working-with-your-remote-repository-on-github-or-github-enterprise/creating-an-issue-or-pull-request\"\u003euse the Github website\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-conclusion\"\u003eConclusion\u003c/h2\u003e\n\u003cp\u003eAnd that’s it. NL-Augmenter has an active community of maintainers who will review your request and add your work to the project. In the meantime, others can try out your tranformation by cloning your fork.\u003c/p\u003e\n\u003cp\u003eCongratulations on contributing!\u003c/p\u003e\n","title":"Using and Adding Transformation to NL-Augmenter","type":"Transformation","background":"This tutorial shows how to use and add your transformation to NL-Augmenter."}},"__N_SSG":true},"page":"/tutorials/[id]","query":{"id":"new_nl_augmenter_transformation"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003cp\u003eThis tutorial will demonstrate how to add a new transformation process to the NL-Augmenter library, which researchers can then use to generate new augmented datasets.\nWe will not cover using filters in this tutorial. If you are interested in using filters, check out the tutorial \u003ca href=\"https://github.com/GEM-benchmark/GEM-benchmark.github.io/blob/main/web/data/notebooks/GEM_Hackathon_2021_filters_tutorial.ipynb\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-using-transformations\"\u003eUsing transformations\u003c/h2\u003e\n\u003cp\u003eA transformation is represented as a python class.\nTo use a transformation, instantiate an instance of the transformation class and call it via \u003ccode\u003etransformation.generate\u003c/code\u003e.\nSee the code snippet below for an example.\nNote that most transformations have unique parameters and \u003ccode\u003egenerate\u003c/code\u003e signatures.\nPlease refer to the README and code of the specific transformation you're interested in using for details.\nYou can find a full list of transformations \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/tree/main/transformations\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003efrom\u003c/span\u003e transformations.butter_fingers_perturbation \u003cspan\u003eimport\u003c/span\u003e ButterFingersPerturbation\ntransform = ButterFingersPerturbation(prob=\u003cspan\u003e0.2\u003c/span\u003e)\ncorpus = [\u003cspan\u003e'This is the first sentence.'\u003c/span\u003e, \u003cspan\u003e'This is the second sentence.'\u003c/span\u003e]\n\u003cspan\u003e# transformations return a list \u003c/span\u003e\ntransformed_corpus = [transform.generate(sentence)[\u003cspan\u003e0\u003c/span\u003e] \u003cspan\u003efor\u003c/span\u003e sentence \u003cspan\u003ein\u003c/span\u003e corpus]\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eCurrently, transformations \u003cem\u003edo not\u003c/em\u003e support batch mode, so you will need to pass your sentences into \u003ccode\u003etransform.generate\u003c/code\u003e individually.\nAfter transforming your corpus, save your data in the preferred format (ideally \u003ccode\u003ejson\u003c/code\u003e or \u003ccode\u003ejsonlines\u003c/code\u003e).\nThen follow \u003ca href=\"https://gem-benchmark.com/tutorials/new_data_loader\"\u003ethis tutorial\u003c/a\u003e to add your dataset to GEM.\u003c/p\u003e\n\u003ch2 id=\"user-content-adding-transformations\"\u003eAdding transformations\u003c/h2\u003e\n\u003ch3 id=\"user-content-preliminaries\"\u003ePreliminaries\u003c/h3\u003e\n\u003ch4 id=\"user-content-check-for-novelty\"\u003eCheck for novelty\u003c/h4\u003e\n\u003cp\u003eFirst step, and probably the most important one, is to carefully read through the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/tree/main/transformations\"\u003elist of existing transformations\u003c/a\u003e, making sure that no one has already covered the ground you’re proposing. There are dozens of existing systems and many more that are under review, so why duplicate existing work?\u003c/p\u003e\n\u003cp\u003eMake sure to check the \u003cstrong\u003etransformations\u003c/strong\u003e subdirectory in the project repo, as well as the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/pulls\"\u003elist of pull requests\u003c/a\u003e.\u003c/p\u003e\n\u003ch4 id=\"user-content-check-the-documentation\"\u003eCheck the documentation\u003c/h4\u003e\n\u003cp\u003eSo, you’re certain that your contribution will be novel. Now, the next step is to go to the project repo and carefully read through the documentation. Most important for our purposes are the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/README.md\"\u003emain readme document\u003c/a\u003e, plus the readmes for the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/interfaces/README.md\"\u003e\u003cstrong\u003einterfaces\u003c/strong\u003e\u003c/a\u003e and \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/evaluation/README.md\"\u003e\u003cstrong\u003eevaluation\u003c/strong\u003e\u003c/a\u003e directories.\u003c/p\u003e\n\u003cp\u003eReading these will give you a solid understanding of the rest of the steps we’re going to cover.\u003c/p\u003e\n\u003ch3 id=\"user-content-setup\"\u003eSetup\u003c/h3\u003e\n\u003ch4 id=\"user-content-fork-the-project-repo\"\u003eFork the project repo\u003c/h4\u003e\n\u003cp\u003eOnce you’re clear on the documentation, go ahead and fork the repository. New transformations should be submitted as pull requests to the main NL-Augmenter repo.\u003c/p\u003e\n\u003cp\u003eClone to your machine, and create a new branch to work in.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003egit \u003cspan\u003eclone\u003c/span\u003e \u003cspan\u003e$PATH_TO_YOUR_FORK\u003c/span\u003e\n\u003cspan\u003ecd\u003c/span\u003e NL-Augmenter\ngit checkout -b new_transformation\u003c/code\u003e\u003c/pre\u003e\n\u003ch4 id=\"user-content-copy-an-existing-transformation-as-a-base\"\u003eCopy an existing transformation as a base\u003c/h4\u003e\n\u003cp\u003eWe’re going to start creating our own transformation by copying an existing simple example.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003ecd\u003c/span\u003e transformations/\n\u003cspan\u003ecp\u003c/span\u003e -r butter_fingers_perturbation new_transformation\n\u003cspan\u003ecd\u003c/span\u003e new_transformation\u003c/code\u003e\u003c/pre\u003e\n\u003ch3 id=\"user-content-create-your-transformation\"\u003eCreate your transformation\u003c/h3\u003e\n\u003ch4 id=\"user-content-update-the-existing-class-info\"\u003eUpdate the existing class info\u003c/h4\u003e\n\u003cp\u003eOpen the \u003ccode\u003etransformation.py\u003c/code\u003e file in your editor of choice. Make sure to rename the class to a name that describes your transformation.\u003c/p\u003e\n\u003cp\u003eThe first thing to define is \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/interfaces/README.md\"\u003ewhat kind of interface\u003c/a\u003e our transformation is going to implement, which tells us what it should accept as input and return as output. As an example, we could choose to implement the QuestionAnswerOperation, which expects a question as a string, some context as a string, and answers as a list of strings, and returns a transformed version.\u003c/p\u003e\n\u003cp\u003eFor our purposes, we’ll be implementing the SentenceOperation interface, which expects only a single piece of text as input and output.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003eclass\u003c/span\u003e \u003cspan\u003eKeyphraseInsertion\u003c/span\u003e(\u003cspan\u003eSentenceOperation\u003c/span\u003e):\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eYou’ll also need to decide which downstream tasks you think your class can be applied to: here, we’re going to choose text classification as an example.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003etasks = [TaskType.TEXT_CLASSIFICATION]\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003ePick the languages this transformation can accept as input using ISO codes here. We’ll stick with English as a default.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003elanguages = [\u003cspan\u003e\"en\"\u003c/span\u003e]\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eThe keywords list is important to fill out. This will give reviewers some idea of the purpose of your transformation, as well as the consequences of using it. You can find a full list and explanation in the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/docs/keywords.md\"\u003edocs/keywords.md\u003c/a\u003e file. I’m going to leave the existing set as they are.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003ekeywords = [\u003cspan\u003e\"morphological\"\u003c/span\u003e, \u003cspan\u003e\"noise\"\u003c/span\u003e, \u003cspan\u003e\"rule-based\"\u003c/span\u003e, \u003cspan\u003e\"high-coverage\"\u003c/span\u003e, \u003cspan\u003e\"high-precision\"\u003c/span\u003e]\u003c/code\u003e\u003c/pre\u003e\n\u003ch4 id=\"user-content-override-the-generate-method\"\u003eOverride the generate method\u003c/h4\u003e\n\u003cp\u003eNow the preliminaries are done, it’s time to create our transformation. We need to override the generate method to carry out our transformation, paying attention to the expected types of the returned values.\u003c/p\u003e\n\u003cp\u003eRemember, for SentenceOperations, the type of the output is a list of strings, so that’s what we should provide. You’ll see our sample class includes a max outputs parameter that controls the number of new outputs generated from a single input sequence. It’s recommended that you use the same format to control how many texts are returned from your transformation.\u003c/p\u003e\n\u003cp\u003eIn our example class, we’re going to insert a keyphrase between every second element in our string.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003e__init__\u003c/span\u003e(\u003cspan\u003eself, seed=\u003cspan\u003e0\u003c/span\u003e, max_outputs=\u003cspan\u003e1\u003c/span\u003e, insert_every=\u003cspan\u003e2\u003c/span\u003e, keyphrase=\u003cspan\u003e\"kp_test\"\u003c/span\u003e\u003c/span\u003e):\n    \u003cspan\u003esuper\u003c/span\u003e().__init__(seed, max_outputs=max_outputs)\n    self.insert_every = insert_every\n    self.keyphrase = keyphrase\n\n\u003cspan\u003edef\u003c/span\u003e \u003cspan\u003egenerate\u003c/span\u003e(\u003cspan\u003eself, sentence: \u003cspan\u003estr\u003c/span\u003e\u003c/span\u003e):\n    perturbed_texts = []\n    itertext = [sentence.split()[i:i + self.insert_every] \u003cspan\u003efor\u003c/span\u003e i \u003cspan\u003ein\u003c/span\u003e\n                \u003cspan\u003erange\u003c/span\u003e(\u003cspan\u003e0\u003c/span\u003e, \u003cspan\u003elen\u003c/span\u003e(sentence.split()), self.insert_every)]\n    \u003cspan\u003efor\u003c/span\u003e i \u003cspan\u003ein\u003c/span\u003e \u003cspan\u003erange\u003c/span\u003e(self.max_outputs):\n        new_text = [\u003cspan\u003ef\"\u003cspan\u003e{\u003cspan\u003e' '\u003c/span\u003e.join(x)}\u003c/span\u003e \u003cspan\u003e{self.keyphrase}\u003c/span\u003e\"\u003c/span\u003e \u003cspan\u003efor\u003c/span\u003e x \u003cspan\u003ein\u003c/span\u003e itertext]\n        perturbed_texts.append(\u003cspan\u003e' '\u003c/span\u003e.join(new_text))\n    \u003cspan\u003ereturn\u003c/span\u003e perturbed_texts\u003c/code\u003e\u003c/pre\u003e\n\u003ch4 id=\"user-content-add-your-requirements\"\u003eAdd your requirements\u003c/h4\u003e\n\u003cp\u003eIf you’ve added any new package requirements, you’re going to need to add them to a \u003ccode\u003erequirements.txt\u003c/code\u003e in the same directory as your transformation. This helps to keep each transformations’ imports separate.\u003c/p\u003e\n\u003cp\u003eFor example, lets add sklearn here, just in case.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003escikit-learn==1.0.1\u003c/code\u003e\u003c/pre\u003e\n\u003ch3 id=\"user-content-test-your-solution\"\u003eTest your solution\u003c/h3\u003e\n\u003cp\u003eNow, it’s time to test your solution on some sample data to make sure it works. First, create a test.json file in the same directory as your transformation, and add at least five test cases.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e\u003cspan\u003e{\u003c/span\u003e\n  \u003cspan\u003e\"type\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"new_transformation\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n  \u003cspan\u003e\"test_cases\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\n    \u003cspan\u003e{\u003c/span\u003e\n      \u003cspan\u003e\"class\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"KeyphraseInsertion\"\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n      \u003cspan\u003e\"inputs\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e{\u003c/span\u003e\n        \u003cspan\u003e\"sentence\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"Andrew finally returned the French book to Chris that I bought last week\"\u003c/span\u003e\n      \u003cspan\u003e}\u003c/span\u003e\u003cspan\u003e,\u003c/span\u003e\n      \u003cspan\u003e\"outputs\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e[\u003c/span\u003e\u003cspan\u003e{\u003c/span\u003e\n        \u003cspan\u003e\"sentence\"\u003c/span\u003e\u003cspan\u003e:\u003c/span\u003e \u003cspan\u003e\"Andrew finally kp_test returned the kp_test French book kp_test to Chris kp_test that I kp_test bought last kp_test week kp_test\"\u003c/span\u003e\n      \u003cspan\u003e}\u003c/span\u003e\u003cspan\u003e]\u003c/span\u003e\n    \u003cspan\u003e}\u003c/span\u003e\n  \u003cspan\u003e]\u003c/span\u003e\n\u003cspan\u003e}\u003c/span\u003e\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eThen we’ll use pytest to check how it runs.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003epytest -s --t=new_transformation\u003c/code\u003e\u003c/pre\u003e\n\u003ch3 id=\"user-content-finishing-up\"\u003eFinishing up\u003c/h3\u003e\n\u003ch4 id=\"user-content-write-the-readme\"\u003eWrite the readme\u003c/h4\u003e\n\u003cp\u003eOnce we’re sure our transformation is producing good results, it’s time to document it. Open up the \u003ccode\u003ereadme.md\u003c/code\u003e document and add all the important details.\u003c/p\u003e\n\u003cp\u003eMake sure to include your name and contact details, along with referencing any prior work you rely on. Add an explanation of the purpose of your transformation, plus some discussion of the kind of tasks you think it will benefit, plus any limitations you’ve identified.\u003c/p\u003e\n\u003cp\u003eIf you think it would be useful, you can check the \u003ca href=\"https://github.com/GEM-benchmark/NL-Augmenter/blob/main/evaluation/README.md\"\u003eevaluation documentation\u003c/a\u003e for details of how to test your work against benchmark datasets and models, and how to add that to your documentation.\u003c/p\u003e\n\u003ch4 id=\"user-content-create-your-pull-request\"\u003eCreate your pull request\u003c/h4\u003e\n\u003cp\u003eFinally, you’re ready to submit!\u003c/p\u003e\n\u003cp\u003eCommit all your changes to your new branch, then push them back to the origin. The easiest way to create a pull request from your commit is to \u003ca href=\"https://docs.github.com/en/desktop/contributing-and-collaborating-using-github-desktop/working-with-your-remote-repository-on-github-or-github-enterprise/creating-an-issue-or-pull-request\"\u003euse the Github website\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-conclusion\"\u003eConclusion\u003c/h2\u003e\n\u003cp\u003eAnd that’s it. NL-Augmenter has an active community of maintainers who will review your request and add your work to the project. In the meantime, others can try out your tranformation by cloning your fork.\u003c/p\u003e\n\u003cp\u003eCongratulations on contributing!\u003c/p\u003e\n","title":"Using and Adding Transformation to NL-Augmenter","type":"Transformation","background":"This tutorial shows how to use and add your transformation to NL-Augmenter."}},"__N_SSG":true},"page":"/tutorials/[id]","query":{"id":"new_nl_augmenter_transformation"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/tutorials/writing_a_data_card.html b/tutorials/writing_a_data_card.html
index 17c6b5b2..62e0b79a 100644
--- a/tutorials/writing_a_data_card.html
+++ b/tutorials/writing_a_data_card.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Writing a data card</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials/%5Bid%5D-78530e674236e7c8.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Writing a data card</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM <!-- -->Writing a data card</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/tutorials/%5Bid%5D-69574b54cf872f16.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">Writing a data card</span><span class="utils_smallSpace__dcJPu"></span><span class="utils_lightText__B_gv3">Data</span><div><h2 id="user-content-table-of-contents">Table of Contents</h2>
 <ul>
 <li><a href="#new-version-for-gemv2">New Version for GEMv2</a></li>
 <li><a href="#dataset-and-task-description">Dataset and Task Description</a>
@@ -188,4 +188,4 @@ <h2 id="user-content-credits">Credits</h2>
 <li><a href="https://arxiv.org/abs/1803.09010">Datasheets for Datasets</a>, Gebru et al.</li>
 </ul>
 <p>This guide and template is an NLG-specific variant of the one produced by <a href="https://github.com/huggingface/datasets/blob/master/templates/README_guide.md">HuggingFace</a>.</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#new-version-for-gemv2\"\u003eNew Version for GEMv2\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-and-task-description\"\u003eDataset and Task Description\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#dataset-and-task-summary\"\u003eDataset and Task Summary\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#why-is-this-dataset-part-of-gem\"\u003eWhy is this dataset part of GEM?\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#languages\"\u003eLanguages\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#meta-information\"\u003eMeta Information\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#dataset-curators\"\u003eDataset Curators\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#licensing-information\"\u003eLicensing Information\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#citation-information\"\u003eCitation Information\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#leaderboard\"\u003eLeaderboard\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-structure\"\u003eDataset Structure\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#data-instances\"\u003eData Instances\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#data-fields\"\u003eData Fields\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#data-statistics\"\u003eData Statistics\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-creation\"\u003eDataset Creation\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#curation-rationale\"\u003eCuration Rationale\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#communicative-goal\"\u003eCommunicative Goal\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#source-data\"\u003eSource Data\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#initial-data-collection-and-normalization\"\u003eInitial Data Collection and Normalization\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#who-are-the-source-language-producers\"\u003eWho are the source language producers?\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#annotations\"\u003eAnnotations\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#annotation-process\"\u003eAnnotation process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#who-are-the-annotators\"\u003eWho are the annotators?\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#personal-and-sensitive-information\"\u003ePersonal and Sensitive Information\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#changes-to-the-original-dataset-for-gem\"\u003eChanges to the Original Dataset for GEM\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#considerations-for-using-the-data\"\u003eConsiderations for Using the Data\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#social-impact-of-the-dataset\"\u003eSocial Impact of the Dataset\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#impact-on-underserved-communities\"\u003eImpact on Underserved Communities\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#discussion-of-biases\"\u003eDiscussion of Biases\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#other-known-limitations\"\u003eOther Known Limitations\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#getting-started-with-in-depth-research-on-the-task\"\u003eGetting started with in-depth research on the task\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#credits\"\u003eCredits\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-new-version-for-gemv2\"\u003eNew Version for GEMv2\u003c/h2\u003e\n\u003cp\u003eWhile most of the tips below still hold true, we are migrating our data cards to a new format!\u003c/p\u003e\n\u003cp\u003eYou can now interactively create datacards using our tool at \u003ca href=\"https://huggingface.co/spaces/GEM/DatasetCardForm\"\u003ehttps://huggingface.co/spaces/GEM/DatasetCardForm\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003eWhen using the tool, you can hover over the ? symbol to get additional information on how to fill out each field.\u003c/p\u003e\n\u003ch2 id=\"user-content-dataset-and-task-description\"\u003eDataset and Task Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003eHomepage:\u003c/strong\u003e \u003ca href=\"#dataset-description\"\u003eAdd homepage URL here if available (unless it's a GitHub repository)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eRepository:\u003c/strong\u003e \u003ca href=\"#dataset-description\"\u003eIf the dataset is hosted on github or has a github homepage, add URL here\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"#dataset-description\"\u003eIf the dataset was introduced by a paper or there was a paper written describing the dataset, add URL here (landing page for Arxiv paper preferred)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e \u003ca href=\"#dataset-description\"\u003eIf known, name and email of at least one person the reader can contact for questions about the dataset.\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-dataset-and-task-summary\"\u003eDataset and Task Summary\u003c/h3\u003e\n\u003cp\u003eBriefly describe the task that is captured within this dataset (e.g., \u003cem\u003edescription of a restaurant from a structured representation of its attributes\u003c/em\u003e). Give an overview of how and why the dataset was created and how it differs from other datasets that capture the task. The summary should explicitly mention the languages present in the dataset (possibly in broad terms, e.g., \u003cem\u003etranslations between several pairs of European languages\u003c/em\u003e), and describe the domain(s), topic(s), or genre(s) covered.\u003c/p\u003e\n\u003ch3 id=\"user-content-why-is-this-dataset-part-of-gem\"\u003eWhy is this dataset part of GEM?\u003c/h3\u003e\n\u003cp\u003eIn 1-2 sentences, describe how this dataset can contribute to GEM's goals and how it stands out from other datasets we chose. In the template, add this answer to the \u003ccode\u003emotivation\u003c/code\u003e tag at the top which will render it as part of the overview page.\u003c/p\u003e\n\u003ch3 id=\"user-content-languages\"\u003eLanguages\u003c/h3\u003e\n\u003cp\u003eProvide a brief overview of the languages represented in the dataset. Describe relevant details about specifics of the language such as whether it is social media text, African American English, ...\u003c/p\u003e\n\u003cp\u003eIf possible, please provide \u003ca href=\"https://tools.ietf.org/html/bcp47\"\u003eBCP-47 codes\u003c/a\u003e, which consist of a \u003ca href=\"https://tools.ietf.org/html/bcp47#section-2.2.1\"\u003eprimary language subtag\u003c/a\u003e, with a \u003ca href=\"https://tools.ietf.org/html/bcp47#section-2.2.3\"\u003escript subtag\u003c/a\u003e and/or \u003ca href=\"https://tools.ietf.org/html/bcp47#section-2.2.4\"\u003eregion subtag\u003c/a\u003e if available.\u003c/p\u003e\n\u003ch2 id=\"user-content-meta-information\"\u003eMeta Information\u003c/h2\u003e\n\u003ch3 id=\"user-content-dataset-curators\"\u003eDataset Curators\u003c/h3\u003e\n\u003cp\u003eList the people involved in collecting the dataset and their affiliation(s). If funding information is known, include it here.\u003c/p\u003e\n\u003ch3 id=\"user-content-licensing-information\"\u003eLicensing Information\u003c/h3\u003e\n\u003cp\u003eProvide the license and link to the license webpage if available.\u003c/p\u003e\n\u003ch3 id=\"user-content-citation-information\"\u003eCitation Information\u003c/h3\u003e\n\u003cp\u003eProvide the \u003ca href=\"http://www.bibtex.org/\"\u003eBibTex\u003c/a\u003e-formatted reference for the dataset. For example:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e@article{article_id,\n  author    = {Author List},\n  title     = {Dataset Paper Title},\n  journal   = {Publication Venue},\n  year      = {2525}\n}\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIf the dataset has a \u003ca href=\"https://www.doi.org/\"\u003eDOI\u003c/a\u003e, please provide it here.\u003c/p\u003e\n\u003ch3 id=\"user-content-leaderboard\"\u003eLeaderboard\u003c/h3\u003e\n\u003cp\u003eIf the dataset supports an active leaderboard, add a link here and describe how models are evaluated.\u003c/p\u003e\n\u003ch2 id=\"user-content-dataset-structure\"\u003eDataset Structure\u003c/h2\u003e\n\u003ch3 id=\"user-content-data-instances\"\u003eData Instances\u003c/h3\u003e\n\u003cp\u003eProvide an JSON-formatted example and brief description of a typical instance in the dataset. If available, provide a link to further examples.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e{\n  'example_field': ...,\n  ...\n}\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eProvide any additional information that is not covered in the other sections about the data here. In particular describe any relationships between data points and if these relationships are made explicit.\u003c/p\u003e\n\u003ch3 id=\"user-content-data-fields\"\u003eData Fields\u003c/h3\u003e\n\u003cp\u003eList and describe the fields present in the dataset. Mention their data type, and whether and how they are used as part of the generation pipeline. Describe each fields' attributes, such as whether they are at the character level or word level, whether they are contiguous or not, etc. If the datasets contains example IDs, state whether they have an inherent meaning, such as a mapping to other datasets or pointing to relationships between data points.\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ccode\u003eexample_field\u003c/code\u003e: description of \u003ccode\u003eexample_field\u003c/code\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-data-statistics\"\u003eData Statistics\u003c/h3\u003e\n\u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n\u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits (e.g., if the training annotations are machine-generated and the dev and test ones are created by humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n\u003cp\u003eProvide the sizes of each split. As appropriate, provide any descriptive statistics for the features, such as average lengths of input and output.  For example:\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003e\u003c/th\u003e\n\u003cth\u003eTrain\u003c/th\u003e\n\u003cth\u003eValid\u003c/th\u003e\n\u003cth\u003eTest\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e# Input Sentences\u003c/td\u003e\n\u003ctd\u003e125\u003c/td\u003e\n\u003ctd\u003e55\u003c/td\u003e\n\u003ctd\u003e10\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAvg. Input Length\u003c/td\u003e\n\u003ctd\u003e10 words\u003c/td\u003e\n\u003ctd\u003e5 words\u003c/td\u003e\n\u003ctd\u003e5 words\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAvg. Output Length\u003c/td\u003e\n\u003ctd\u003e3 words\u003c/td\u003e\n\u003ctd\u003e5 words\u003c/td\u003e\n\u003ctd\u003e4.6 words\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-dataset-creation\"\u003eDataset Creation\u003c/h2\u003e\n\u003ch3 id=\"user-content-curation-rationale\"\u003eCuration Rationale\u003c/h3\u003e\n\u003cp\u003eWhat need motivated the creation of this dataset? What are some of the reasons underlying the major choices involved in putting it together?\u003c/p\u003e\n\u003ch3 id=\"user-content-communicative-goal\"\u003eCommunicative Goal\u003c/h3\u003e\n\u003cp\u003eWhat is the goal of a speaker who generates the target utterance (e.g., \u003cem\u003edescribe a restaurant\u003c/em\u003e).\u003c/p\u003e\n\u003ch3 id=\"user-content-source-data\"\u003eSource Data\u003c/h3\u003e\n\u003cp\u003eThis section describes the source data (e.g., \u003cem\u003enews text and headlines, social media posts, translated sentences, ...\u003c/em\u003e)\u003c/p\u003e\n\u003ch4 id=\"user-content-initial-data-collection-and-normalization\"\u003eInitial Data Collection and Normalization\u003c/h4\u003e\n\u003cp\u003eDescribe the data collection process. Describe any criteria for data selection or filtering. List any key words or search terms used. If possible, include runtime information for the collection process. If data was collected from other pre-existing datasets, link to source here. If the data was modified or normalized after being collected, describe the process and the tools used.\u003c/p\u003e\n\u003ch4 id=\"user-content-who-are-the-source-language-producers\"\u003eWho are the source language producers?\u003c/h4\u003e\n\u003cp\u003eState whether the data was produced by humans or machine generated. Describe the people or systems who originally created the data.\u003c/p\u003e\n\u003cp\u003eIf available, include self-reported demographic or identity information for the source data creators, but avoid inferring this information. Instead state that this information is unknown. See \u003ca href=\"https://www.aclweb.org/anthology/W17-1601.pdf\"\u003eLarson 2017\u003c/a\u003e for using identity categories as a variables, particularly gender.\u003c/p\u003e\n\u003cp\u003eDescribe the conditions under which the data was created (for example, if the producers were crowdworkers, state what platform was used, or if the data was found, what website the data was found on). If compensation was provided, include that information here.\u003c/p\u003e\n\u003cp\u003eDescribe other people represented or mentioned in the data. Where possible, link to references for the information.\u003c/p\u003e\n\u003ch3 id=\"user-content-annotations\"\u003eAnnotations\u003c/h3\u003e\n\u003cp\u003eIf the dataset contains annotations which are not part of the initial data collection, describe them in the following paragraphs.\u003c/p\u003e\n\u003ch4 id=\"user-content-annotation-process\"\u003eAnnotation process\u003c/h4\u003e\n\u003cp\u003eIf applicable, describe the annotation process and any tools used, or state otherwise. Describe the amount of data annotated, if not all. Describe or reference annotation guidelines provided to the annotators. If available, provide inter-annotator statistics. Describe any annotation validation processes.\nIn case multiple sets of annotators were used, please answer all questions for all sets of annotators.\u003c/p\u003e\n\u003ch4 id=\"user-content-who-are-the-annotators\"\u003eWho are the annotators?\u003c/h4\u003e\n\u003cp\u003eIf annotations were collected for the source data (such as class labels or syntactic parses), state whether the annotations were produced by humans or machine generated.\u003c/p\u003e\n\u003cp\u003eDescribe the people or systems who originally created the annotations and their selection criteria if applicable.\u003c/p\u003e\n\u003cp\u003eIf available, include self-reported demographic or identity information for the annotators, but avoid inferring this information. Instead state that this information is unknown. See \u003ca href=\"https://www.aclweb.org/anthology/W17-1601.pdf\"\u003eLarson 2017\u003c/a\u003e for using identity categories as a variables, particularly gender.\u003c/p\u003e\n\u003cp\u003eDescribe the conditions under which the data was annotated (for example, if the annotators were crowdworkers, state what platform was used, or if the data was found, what website the data was found on). If compensation was provided, include that information here.\u003c/p\u003e\n\u003ch3 id=\"user-content-personal-and-sensitive-information\"\u003ePersonal and Sensitive Information\u003c/h3\u003e\n\u003cp\u003eState whether the dataset uses identity categories and, if so, how the information is used. Describe where this information comes from (i.e. self-reporting, collecting from profiles, inferring, etc.). See \u003ca href=\"https://www.aclweb.org/anthology/W17-1601.pdf\"\u003eLarson 2017\u003c/a\u003e for using identity categories as a variables, particularly gender. State whether the data is linked to individuals and whether those individuals can be identified in the dataset, either directly or indirectly (i.e., in combination with other data).\u003c/p\u003e\n\u003cp\u003eState whether the dataset contains other data that might be considered sensitive (e.g., data that reveals racial or ethnic origins, sexual orientations, religious beliefs, political opinions or union memberships, or locations; financial or health data; biometric or genetic data; forms of government identification, such as social security numbers; criminal history).\u003c/p\u003e\n\u003cp\u003eIf efforts were made to anonymize the data, describe the anonymization process.\u003c/p\u003e\n\u003ch2 id=\"user-content-changes-to-the-original-dataset-for-gem\"\u003eChanges to the Original Dataset for GEM\u003c/h2\u003e\n\u003cp\u003eIf the originally published dataset was modified in any way for GEM, please record the changes here. These could include data cleaning, exclusion of certain languages, changes to the data splits, additional challenge examples, among others.\u003c/p\u003e\n\u003ch2 id=\"user-content-considerations-for-using-the-data\"\u003eConsiderations for Using the Data\u003c/h2\u003e\n\u003ch3 id=\"user-content-social-impact-of-the-dataset\"\u003eSocial Impact of the Dataset\u003c/h3\u003e\n\u003cp\u003ePlease discuss some of the ways you believe the use of this dataset will impact society.\u003c/p\u003e\n\u003cp\u003eThe statement should include both positive outlooks, such as outlining how technologies developed through its use may improve people's lives, and discuss the accompanying risks. These risks may range from making important decisions more opaque to people who are affected by the technology, to reinforcing existing harmful biases (whose specifics should be discussed in the next section), among other considerations.\u003c/p\u003e\n\u003ch3 id=\"user-content-impact-on-underserved-communities\"\u003eImpact on Underserved Communities\u003c/h3\u003e\n\u003cp\u003eDescribe in this section if the proposed dataset contains a \u003cem\u003elow-resource\u003c/em\u003e or under-represented language. If this is the case or if this task has any impact on underserved communities, please elaborate on the previous section.\u003c/p\u003e\n\u003ch3 id=\"user-content-discussion-of-biases\"\u003eDiscussion of Biases\u003c/h3\u003e\n\u003cp\u003eProvide descriptions of specific biases that are likely to be reflected in the data, and state whether any steps were taken to reduce their impact.\u003c/p\u003e\n\u003cp\u003eFor Wikipedia text, see for example \u003ca href=\"https://arxiv.org/abs/2005.00614\"\u003eDinan et al 2020 on biases in Wikipedia (esp. Table 1)\u003c/a\u003e, or \u003ca href=\"https://www.aclweb.org/anthology/2020.acl-main.485/\"\u003eBlodgett et al 2020\u003c/a\u003e for a more general discussion of the topic.\u003c/p\u003e\n\u003cp\u003eIf analyses have been run quantifying these biases, please add brief summaries and links to the studies here.\u003c/p\u003e\n\u003ch3 id=\"user-content-other-known-limitations\"\u003eOther Known Limitations\u003c/h3\u003e\n\u003cp\u003eIf studies of the datasets have outlined other limitations of the dataset, such as annotation artifacts, please outline and cite them here.\u003c/p\u003e\n\u003ch2 id=\"user-content-getting-started-with-in-depth-research-on-the-task\"\u003eGetting started with in-depth research on the task\u003c/h2\u003e\n\u003cp\u003eIn this section, add relevant pointers to resources that researchers can consult when they want to get started digging deeper into the task. These can include blog posts, research papers, literature surveys, etc.\u003c/p\u003e\n\u003ch2 id=\"user-content-credits\"\u003eCredits\u003c/h2\u003e\n\u003cp\u003eData sheets were introduced by the following two publications:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://www.aclweb.org/anthology/Q18-1041/\"\u003eData Statements for Natural Language Processing: Toward Mitigating System Bias and Enabling Better Science\u003c/a\u003e, Bender and Friedman\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://arxiv.org/abs/1803.09010\"\u003eDatasheets for Datasets\u003c/a\u003e, Gebru et al.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eThis guide and template is an NLG-specific variant of the one produced by \u003ca href=\"https://github.com/huggingface/datasets/blob/master/templates/README_guide.md\"\u003eHuggingFace\u003c/a\u003e.\u003c/p\u003e\n","title":"Writing a data card","type":"Data","background":"This guide describes how to fill each section in the data card (GEMv1 version)."}},"__N_SSG":true},"page":"/tutorials/[id]","query":{"id":"writing_a_data_card"},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"taskData":{"contentHtml":"\u003ch2 id=\"user-content-table-of-contents\"\u003eTable of Contents\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#new-version-for-gemv2\"\u003eNew Version for GEMv2\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-and-task-description\"\u003eDataset and Task Description\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#dataset-and-task-summary\"\u003eDataset and Task Summary\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#why-is-this-dataset-part-of-gem\"\u003eWhy is this dataset part of GEM?\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#languages\"\u003eLanguages\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#meta-information\"\u003eMeta Information\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#dataset-curators\"\u003eDataset Curators\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#licensing-information\"\u003eLicensing Information\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#citation-information\"\u003eCitation Information\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#leaderboard\"\u003eLeaderboard\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-structure\"\u003eDataset Structure\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#data-instances\"\u003eData Instances\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#data-fields\"\u003eData Fields\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#data-statistics\"\u003eData Statistics\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#dataset-creation\"\u003eDataset Creation\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#curation-rationale\"\u003eCuration Rationale\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#communicative-goal\"\u003eCommunicative Goal\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#source-data\"\u003eSource Data\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#initial-data-collection-and-normalization\"\u003eInitial Data Collection and Normalization\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#who-are-the-source-language-producers\"\u003eWho are the source language producers?\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#annotations\"\u003eAnnotations\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#annotation-process\"\u003eAnnotation process\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#who-are-the-annotators\"\u003eWho are the annotators?\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#personal-and-sensitive-information\"\u003ePersonal and Sensitive Information\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#changes-to-the-original-dataset-for-gem\"\u003eChanges to the Original Dataset for GEM\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#considerations-for-using-the-data\"\u003eConsiderations for Using the Data\u003c/a\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"#social-impact-of-the-dataset\"\u003eSocial Impact of the Dataset\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#impact-on-underserved-communities\"\u003eImpact on Underserved Communities\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#discussion-of-biases\"\u003eDiscussion of Biases\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#other-known-limitations\"\u003eOther Known Limitations\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#getting-started-with-in-depth-research-on-the-task\"\u003eGetting started with in-depth research on the task\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#credits\"\u003eCredits\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch2 id=\"user-content-new-version-for-gemv2\"\u003eNew Version for GEMv2\u003c/h2\u003e\n\u003cp\u003eWhile most of the tips below still hold true, we are migrating our data cards to a new format!\u003c/p\u003e\n\u003cp\u003eYou can now interactively create datacards using our tool at \u003ca href=\"https://huggingface.co/spaces/GEM/DatasetCardForm\"\u003ehttps://huggingface.co/spaces/GEM/DatasetCardForm\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003eWhen using the tool, you can hover over the ? symbol to get additional information on how to fill out each field.\u003c/p\u003e\n\u003ch2 id=\"user-content-dataset-and-task-description\"\u003eDataset and Task Description\u003c/h2\u003e\n\u003cul\u003e\n\u003cli\u003e\u003cstrong\u003eHomepage:\u003c/strong\u003e \u003ca href=\"#dataset-description\"\u003eAdd homepage URL here if available (unless it's a GitHub repository)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003eRepository:\u003c/strong\u003e \u003ca href=\"#dataset-description\"\u003eIf the dataset is hosted on github or has a github homepage, add URL here\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePaper:\u003c/strong\u003e \u003ca href=\"#dataset-description\"\u003eIf the dataset was introduced by a paper or there was a paper written describing the dataset, add URL here (landing page for Arxiv paper preferred)\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003cstrong\u003ePoint of Contact:\u003c/strong\u003e \u003ca href=\"#dataset-description\"\u003eIf known, name and email of at least one person the reader can contact for questions about the dataset.\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-dataset-and-task-summary\"\u003eDataset and Task Summary\u003c/h3\u003e\n\u003cp\u003eBriefly describe the task that is captured within this dataset (e.g., \u003cem\u003edescription of a restaurant from a structured representation of its attributes\u003c/em\u003e). Give an overview of how and why the dataset was created and how it differs from other datasets that capture the task. The summary should explicitly mention the languages present in the dataset (possibly in broad terms, e.g., \u003cem\u003etranslations between several pairs of European languages\u003c/em\u003e), and describe the domain(s), topic(s), or genre(s) covered.\u003c/p\u003e\n\u003ch3 id=\"user-content-why-is-this-dataset-part-of-gem\"\u003eWhy is this dataset part of GEM?\u003c/h3\u003e\n\u003cp\u003eIn 1-2 sentences, describe how this dataset can contribute to GEM's goals and how it stands out from other datasets we chose. In the template, add this answer to the \u003ccode\u003emotivation\u003c/code\u003e tag at the top which will render it as part of the overview page.\u003c/p\u003e\n\u003ch3 id=\"user-content-languages\"\u003eLanguages\u003c/h3\u003e\n\u003cp\u003eProvide a brief overview of the languages represented in the dataset. Describe relevant details about specifics of the language such as whether it is social media text, African American English, ...\u003c/p\u003e\n\u003cp\u003eIf possible, please provide \u003ca href=\"https://tools.ietf.org/html/bcp47\"\u003eBCP-47 codes\u003c/a\u003e, which consist of a \u003ca href=\"https://tools.ietf.org/html/bcp47#section-2.2.1\"\u003eprimary language subtag\u003c/a\u003e, with a \u003ca href=\"https://tools.ietf.org/html/bcp47#section-2.2.3\"\u003escript subtag\u003c/a\u003e and/or \u003ca href=\"https://tools.ietf.org/html/bcp47#section-2.2.4\"\u003eregion subtag\u003c/a\u003e if available.\u003c/p\u003e\n\u003ch2 id=\"user-content-meta-information\"\u003eMeta Information\u003c/h2\u003e\n\u003ch3 id=\"user-content-dataset-curators\"\u003eDataset Curators\u003c/h3\u003e\n\u003cp\u003eList the people involved in collecting the dataset and their affiliation(s). If funding information is known, include it here.\u003c/p\u003e\n\u003ch3 id=\"user-content-licensing-information\"\u003eLicensing Information\u003c/h3\u003e\n\u003cp\u003eProvide the license and link to the license webpage if available.\u003c/p\u003e\n\u003ch3 id=\"user-content-citation-information\"\u003eCitation Information\u003c/h3\u003e\n\u003cp\u003eProvide the \u003ca href=\"http://www.bibtex.org/\"\u003eBibTex\u003c/a\u003e-formatted reference for the dataset. For example:\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e@article{article_id,\n  author    = {Author List},\n  title     = {Dataset Paper Title},\n  journal   = {Publication Venue},\n  year      = {2525}\n}\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIf the dataset has a \u003ca href=\"https://www.doi.org/\"\u003eDOI\u003c/a\u003e, please provide it here.\u003c/p\u003e\n\u003ch3 id=\"user-content-leaderboard\"\u003eLeaderboard\u003c/h3\u003e\n\u003cp\u003eIf the dataset supports an active leaderboard, add a link here and describe how models are evaluated.\u003c/p\u003e\n\u003ch2 id=\"user-content-dataset-structure\"\u003eDataset Structure\u003c/h2\u003e\n\u003ch3 id=\"user-content-data-instances\"\u003eData Instances\u003c/h3\u003e\n\u003cp\u003eProvide an JSON-formatted example and brief description of a typical instance in the dataset. If available, provide a link to further examples.\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003e{\n  'example_field': ...,\n  ...\n}\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eProvide any additional information that is not covered in the other sections about the data here. In particular describe any relationships between data points and if these relationships are made explicit.\u003c/p\u003e\n\u003ch3 id=\"user-content-data-fields\"\u003eData Fields\u003c/h3\u003e\n\u003cp\u003eList and describe the fields present in the dataset. Mention their data type, and whether and how they are used as part of the generation pipeline. Describe each fields' attributes, such as whether they are at the character level or word level, whether they are contiguous or not, etc. If the datasets contains example IDs, state whether they have an inherent meaning, such as a mapping to other datasets or pointing to relationships between data points.\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ccode\u003eexample_field\u003c/code\u003e: description of \u003ccode\u003eexample_field\u003c/code\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-data-statistics\"\u003eData Statistics\u003c/h3\u003e\n\u003cp\u003eDescribe and name the splits in the dataset if there are more than one.\u003c/p\u003e\n\u003cp\u003eDescribe any criteria for splitting the data, if used. If there are differences between the splits (e.g., if the training annotations are machine-generated and the dev and test ones are created by humans, or if different numbers of annotators contributed to each example), describe them here.\u003c/p\u003e\n\u003cp\u003eProvide the sizes of each split. As appropriate, provide any descriptive statistics for the features, such as average lengths of input and output.  For example:\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003e\u003c/th\u003e\n\u003cth\u003eTrain\u003c/th\u003e\n\u003cth\u003eValid\u003c/th\u003e\n\u003cth\u003eTest\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e# Input Sentences\u003c/td\u003e\n\u003ctd\u003e125\u003c/td\u003e\n\u003ctd\u003e55\u003c/td\u003e\n\u003ctd\u003e10\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAvg. Input Length\u003c/td\u003e\n\u003ctd\u003e10 words\u003c/td\u003e\n\u003ctd\u003e5 words\u003c/td\u003e\n\u003ctd\u003e5 words\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAvg. Output Length\u003c/td\u003e\n\u003ctd\u003e3 words\u003c/td\u003e\n\u003ctd\u003e5 words\u003c/td\u003e\n\u003ctd\u003e4.6 words\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-dataset-creation\"\u003eDataset Creation\u003c/h2\u003e\n\u003ch3 id=\"user-content-curation-rationale\"\u003eCuration Rationale\u003c/h3\u003e\n\u003cp\u003eWhat need motivated the creation of this dataset? What are some of the reasons underlying the major choices involved in putting it together?\u003c/p\u003e\n\u003ch3 id=\"user-content-communicative-goal\"\u003eCommunicative Goal\u003c/h3\u003e\n\u003cp\u003eWhat is the goal of a speaker who generates the target utterance (e.g., \u003cem\u003edescribe a restaurant\u003c/em\u003e).\u003c/p\u003e\n\u003ch3 id=\"user-content-source-data\"\u003eSource Data\u003c/h3\u003e\n\u003cp\u003eThis section describes the source data (e.g., \u003cem\u003enews text and headlines, social media posts, translated sentences, ...\u003c/em\u003e)\u003c/p\u003e\n\u003ch4 id=\"user-content-initial-data-collection-and-normalization\"\u003eInitial Data Collection and Normalization\u003c/h4\u003e\n\u003cp\u003eDescribe the data collection process. Describe any criteria for data selection or filtering. List any key words or search terms used. If possible, include runtime information for the collection process. If data was collected from other pre-existing datasets, link to source here. If the data was modified or normalized after being collected, describe the process and the tools used.\u003c/p\u003e\n\u003ch4 id=\"user-content-who-are-the-source-language-producers\"\u003eWho are the source language producers?\u003c/h4\u003e\n\u003cp\u003eState whether the data was produced by humans or machine generated. Describe the people or systems who originally created the data.\u003c/p\u003e\n\u003cp\u003eIf available, include self-reported demographic or identity information for the source data creators, but avoid inferring this information. Instead state that this information is unknown. See \u003ca href=\"https://www.aclweb.org/anthology/W17-1601.pdf\"\u003eLarson 2017\u003c/a\u003e for using identity categories as a variables, particularly gender.\u003c/p\u003e\n\u003cp\u003eDescribe the conditions under which the data was created (for example, if the producers were crowdworkers, state what platform was used, or if the data was found, what website the data was found on). If compensation was provided, include that information here.\u003c/p\u003e\n\u003cp\u003eDescribe other people represented or mentioned in the data. Where possible, link to references for the information.\u003c/p\u003e\n\u003ch3 id=\"user-content-annotations\"\u003eAnnotations\u003c/h3\u003e\n\u003cp\u003eIf the dataset contains annotations which are not part of the initial data collection, describe them in the following paragraphs.\u003c/p\u003e\n\u003ch4 id=\"user-content-annotation-process\"\u003eAnnotation process\u003c/h4\u003e\n\u003cp\u003eIf applicable, describe the annotation process and any tools used, or state otherwise. Describe the amount of data annotated, if not all. Describe or reference annotation guidelines provided to the annotators. If available, provide inter-annotator statistics. Describe any annotation validation processes.\nIn case multiple sets of annotators were used, please answer all questions for all sets of annotators.\u003c/p\u003e\n\u003ch4 id=\"user-content-who-are-the-annotators\"\u003eWho are the annotators?\u003c/h4\u003e\n\u003cp\u003eIf annotations were collected for the source data (such as class labels or syntactic parses), state whether the annotations were produced by humans or machine generated.\u003c/p\u003e\n\u003cp\u003eDescribe the people or systems who originally created the annotations and their selection criteria if applicable.\u003c/p\u003e\n\u003cp\u003eIf available, include self-reported demographic or identity information for the annotators, but avoid inferring this information. Instead state that this information is unknown. See \u003ca href=\"https://www.aclweb.org/anthology/W17-1601.pdf\"\u003eLarson 2017\u003c/a\u003e for using identity categories as a variables, particularly gender.\u003c/p\u003e\n\u003cp\u003eDescribe the conditions under which the data was annotated (for example, if the annotators were crowdworkers, state what platform was used, or if the data was found, what website the data was found on). If compensation was provided, include that information here.\u003c/p\u003e\n\u003ch3 id=\"user-content-personal-and-sensitive-information\"\u003ePersonal and Sensitive Information\u003c/h3\u003e\n\u003cp\u003eState whether the dataset uses identity categories and, if so, how the information is used. Describe where this information comes from (i.e. self-reporting, collecting from profiles, inferring, etc.). See \u003ca href=\"https://www.aclweb.org/anthology/W17-1601.pdf\"\u003eLarson 2017\u003c/a\u003e for using identity categories as a variables, particularly gender. State whether the data is linked to individuals and whether those individuals can be identified in the dataset, either directly or indirectly (i.e., in combination with other data).\u003c/p\u003e\n\u003cp\u003eState whether the dataset contains other data that might be considered sensitive (e.g., data that reveals racial or ethnic origins, sexual orientations, religious beliefs, political opinions or union memberships, or locations; financial or health data; biometric or genetic data; forms of government identification, such as social security numbers; criminal history).\u003c/p\u003e\n\u003cp\u003eIf efforts were made to anonymize the data, describe the anonymization process.\u003c/p\u003e\n\u003ch2 id=\"user-content-changes-to-the-original-dataset-for-gem\"\u003eChanges to the Original Dataset for GEM\u003c/h2\u003e\n\u003cp\u003eIf the originally published dataset was modified in any way for GEM, please record the changes here. These could include data cleaning, exclusion of certain languages, changes to the data splits, additional challenge examples, among others.\u003c/p\u003e\n\u003ch2 id=\"user-content-considerations-for-using-the-data\"\u003eConsiderations for Using the Data\u003c/h2\u003e\n\u003ch3 id=\"user-content-social-impact-of-the-dataset\"\u003eSocial Impact of the Dataset\u003c/h3\u003e\n\u003cp\u003ePlease discuss some of the ways you believe the use of this dataset will impact society.\u003c/p\u003e\n\u003cp\u003eThe statement should include both positive outlooks, such as outlining how technologies developed through its use may improve people's lives, and discuss the accompanying risks. These risks may range from making important decisions more opaque to people who are affected by the technology, to reinforcing existing harmful biases (whose specifics should be discussed in the next section), among other considerations.\u003c/p\u003e\n\u003ch3 id=\"user-content-impact-on-underserved-communities\"\u003eImpact on Underserved Communities\u003c/h3\u003e\n\u003cp\u003eDescribe in this section if the proposed dataset contains a \u003cem\u003elow-resource\u003c/em\u003e or under-represented language. If this is the case or if this task has any impact on underserved communities, please elaborate on the previous section.\u003c/p\u003e\n\u003ch3 id=\"user-content-discussion-of-biases\"\u003eDiscussion of Biases\u003c/h3\u003e\n\u003cp\u003eProvide descriptions of specific biases that are likely to be reflected in the data, and state whether any steps were taken to reduce their impact.\u003c/p\u003e\n\u003cp\u003eFor Wikipedia text, see for example \u003ca href=\"https://arxiv.org/abs/2005.00614\"\u003eDinan et al 2020 on biases in Wikipedia (esp. Table 1)\u003c/a\u003e, or \u003ca href=\"https://www.aclweb.org/anthology/2020.acl-main.485/\"\u003eBlodgett et al 2020\u003c/a\u003e for a more general discussion of the topic.\u003c/p\u003e\n\u003cp\u003eIf analyses have been run quantifying these biases, please add brief summaries and links to the studies here.\u003c/p\u003e\n\u003ch3 id=\"user-content-other-known-limitations\"\u003eOther Known Limitations\u003c/h3\u003e\n\u003cp\u003eIf studies of the datasets have outlined other limitations of the dataset, such as annotation artifacts, please outline and cite them here.\u003c/p\u003e\n\u003ch2 id=\"user-content-getting-started-with-in-depth-research-on-the-task\"\u003eGetting started with in-depth research on the task\u003c/h2\u003e\n\u003cp\u003eIn this section, add relevant pointers to resources that researchers can consult when they want to get started digging deeper into the task. These can include blog posts, research papers, literature surveys, etc.\u003c/p\u003e\n\u003ch2 id=\"user-content-credits\"\u003eCredits\u003c/h2\u003e\n\u003cp\u003eData sheets were introduced by the following two publications:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://www.aclweb.org/anthology/Q18-1041/\"\u003eData Statements for Natural Language Processing: Toward Mitigating System Bias and Enabling Better Science\u003c/a\u003e, Bender and Friedman\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://arxiv.org/abs/1803.09010\"\u003eDatasheets for Datasets\u003c/a\u003e, Gebru et al.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eThis guide and template is an NLG-specific variant of the one produced by \u003ca href=\"https://github.com/huggingface/datasets/blob/master/templates/README_guide.md\"\u003eHuggingFace\u003c/a\u003e.\u003c/p\u003e\n","title":"Writing a data card","type":"Data","background":"This guide describes how to fill each section in the data card (GEMv1 version)."}},"__N_SSG":true},"page":"/tutorials/[id]","query":{"id":"writing_a_data_card"},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/workshop.html b/workshop.html
index 485c46b4..0de4967f 100644
--- a/workshop.html
+++ b/workshop.html
@@ -1,52 +1,523 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2023</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop-b31f31ce3cd1987f.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM 💎 Workshop at EMNLP 2023</span><span class="utils_smallSpace__dcJPu"></span><div><p>The Third Version of the <a href="https://gem-benchmark.com/">Generation, Evaluation &#x26; Metrics (GEM) Workshop</a> will be held as part of <a href="https://2023.emnlp.org/">EMNLP</a>, December 6-10, 2023.</p>
-<h3 id="user-content-overview">Overview</h3>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2023</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop-ab0e5c9fcf25aeda.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM 💎 Workshop at EMNLP 2023</span><span class="utils_smallSpace__dcJPu"></span><div><p>The Third Version of the <a href="https://gem-benchmark.com/">Generation, Evaluation &#x26; Metrics (GEM) Workshop</a> will be held as part of <a href="https://2023.emnlp.org/">EMNLP</a>, 📅 December 6, 2023.</p>
+<h2 id="user-content-overview">Overview</h2>
 <p>Many new NLP applications are cast through the lens of natural language generation. With the advent of these new approaches, many opportunities arise: generation in previously less studied languages, new evaluation paradigms, methods for corpus creation, more efficient architectures, strategies for safe deployments, among many others. At the same time, we can learn from the rich history of NLG research to further improve generation methods.
 These developments require robust and sound NLG evaluation processes. To that end, the GEM workshop aims to encourage the development of model auditing and human evaluation strategies, and to popularize model evaluations in languages beyond English.</p>
-<p>We welcome submissions related, but not limited to, the following topics:</p>
-<ul>
-<li>💎 Automatic evaluation of generation systems (<a href="https://aclanthology.org/2021.gem-1.8/">example</a>, <a href="https://aclanthology.org/2021.gem-1.1/">example</a>, <a href="https://aclanthology.org/2022.gem-1.26/">example</a>)</li>
-<li>💎 Creating NLG corpora and challenge sets (<a href="https://aclanthology.org/2022.tacl-1.4/">example</a>, <a href="https://openreview.net/forum?id=CSi1eu_2q96">example</a>, <a href="https://aclanthology.org/2022.gem-1.6/">example</a>)</li>
-<li>💎 Critiques of benchmarking efforts and responsibly measuring progress in NLG (<a href="https://aclanthology.org/2020.emnlp-main.393/">example</a>, <a href="https://openreview.net/forum?id=j6NxpQbREA1">example</a>)</li>
-<li>💎 Effective and/or efficient NLG methods that can be applied to a wide range of languages and/or scenarios (<a href="https://aclanthology.org/2020.tacl-1.47/">example</a>, <a href="https://aclanthology.org/2021.gem-1.16/">example</a>, <a href="https://aclanthology.org/2022.gem-1.1/">example</a>)</li>
-<li>💎 Application and evaluation of generation models interacting with external data and tools (<a href="https://arxiv.org/abs/2302.04761">example</a>, <a href="https://arxiv.org/abs/2304.09842">example</a>, <a href="https://arxiv.org/abs/2302.07842">example</a>)</li>
-<li>💎 Sociotechnical perspectives of employing large language models (<a href="https://dl.acm.org/doi/abs/10.1145/3531146.3533088">example</a>)</li>
-<li>💎 Standardizing human evaluation and making it more robust (<a href="https://aclanthology.org/2021.tacl-1.87/">example</a>, <a href="https://aclanthology.org/2022.humeval-1.7/">example</a>, <a href="https://aclanthology.org/2022.gem-1.12/">example</a>)</li>
-</ul>
-<p>We further invite submissions that conduct in-depth analyses of outputs of existing systems, for example through error analyses, by applying new metrics, or by testing the system on new test sets. While we encourage the use of the infrastructure the organizing team has developed as part of the <a href="https://arxiv.org/abs/2206.11249">GEM benchmark</a>, its use is not required.</p>
-<p>If you are interested, you can check out last year's workshop websites from <a href="/workshop/2021">ACL 2021</a> and <a href="/workshop/2022">EMNLP 2022</a>.</p>
-<h3 id="user-content-industrial-track---unleashing-the-power-of-nlp-bridging-the-gap-between-academia-and-industry">Industrial Track - Unleashing the Power of NLP: Bridging the Gap between Academia and Industry</h3>
-<p>GEM 2023 is proud to announce the launch of its Industrial Track, which aims to provide actionable insights to industry professionals and to foster collaborations between academia and industry. This track will address the unique challenges faced by non-academic colleagues, highlighting the differences in evaluation practices between academic and industrial research, and explore the challenges in evaluating generative models with real-world data.</p>
-<p>The Industrial Track invites submissions covering the following topics, including (but not limited to):</p>
-<ul>
-<li>💎 Breaking Barriers: Bridging the Gap between Academic and Industrial Research (<a href="https://aclanthology.org/P17-2015">example</a>)</li>
-<li>💎 From Data Diversity to Model Robustness: Challenges in Evaluating Generative Models with Real-World Data (<a href="https://aclanthology.org/2021.sigdial-1.8/">example</a>)</li>
-<li>💎 Beyond Metrics: Evaluating Generative Models for Real-World Business Impact (<a href="https://arxiv.org/abs/1906.02243">example</a>, <a href="https://aclanthology.org/P16-2096">example</a>, <a href="https://arxiv.org/abs/2306.07402">example</a>)</li>
-</ul>
-<h3 id="user-content-how-to-submit">How to submit?</h3>
-<p>Submissions can take either of the following forms:</p>
-<ul>
-<li>💎 Archival Papers Papers describing original and unpublished work can be submitted in a between 4 and 8 page format.</li>
-<li>💎 Non-Archival Abstracts To discuss work already presented or under review at a peer-reviewed venue, we allow the submission of 2-page abstracts.</li>
-</ul>
-<p>All submissions are allowed unlimited space for references and appendices and should conform to EMNLP 2023 style guidelines. Archival paper submissions must be anonymized while abstract submissions may include author information.</p>
-<p>You can submit directly through <a href="https://softconf.com/emnlp2023/GEM2023">SoftConf</a>. Please select the track you are submitting to during the submission.</p>
-<p>We additionally welcome presentations by authors of papers in the Findings of the EMNLP. The selection process is managed centrally by the workshop chairs for the conference and we thus cannot respond to all individual inquiries. However, we will try our best to accomodate your requests.</p>
-<h3 id="user-content-shared-task">Shared Task</h3>
-<p>We are organizing a shared task focused on multilingual summarization, including human and automatic evaluation. The Shared Task will be run "Backwards": the workshop will serve as a platform to pre-register your hypotheses. More info on how to participate to come!</p>
-<h3 id="user-content-important-dates">Important Dates</h3>
-<p>Note: For any questions, please email <a href="mailto:gem-benchmark-chairs@googlegroups.com">gem-benchmark-chairs@googlegroups.com</a>.</p>
-<p>Paper Submission Dates</p>
-<ul>
-<li>📅 8 September 2023: Workshop paper submission deadline</li>
-<li>📅 20 October 2023:   Workshop paper notification deadline</li>
-<li>📅 3 November 2023:  Workshop paper camera ready deadline</li>
-</ul>
-<p><strong>Note</strong> The website showed wrong dates for notication and CR deadlines. Apologies for any inconvenience.</p>
-<p>Workshop Dates</p>
-<ul>
-<li>📅 6 December 2022: Workshop</li>
-</ul>
-<h3 id="user-content-organization">Organization</h3>
+<p>If you are interested, you can check out last year's workshop websites from <a href="/workshop/2021">ACL 2021</a> and <a href="/workshop/2022">EMNLP 2022</a>. Our call for this workshop can be found <a href="/workshop/2023-call">here</a>.</p>
+<h2 id="user-content-schedule">Schedule</h2>
+<p>** This will be filled in a few days**</p>
+<p>All times in local Singapore Time, please use a converter like <a href="https://www.timeanddate.com/worldclock/converter.html?iso=20231108T180000&#x26;p1=236">this one</a> to if you are in a different time zone.
+To accomodate attendees from as many time zones as possible, we will have a virtual-only part in the evening.</p>
+<table>
+<thead>
+<tr>
+<th>Start</th>
+<th>End</th>
+<th></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>9:00</td>
+<td>10:30</td>
+<td>Opening Remarks + 6 x 12 minutes talk</td>
+</tr>
+<tr>
+<td>10:30</td>
+<td>11:00</td>
+<td>Coffee Break</td>
+</tr>
+<tr>
+<td>11:00</td>
+<td>12:30</td>
+<td>Poster Session</td>
+</tr>
+<tr>
+<td>12:30</td>
+<td>14:00</td>
+<td>Lunch Break</td>
+</tr>
+<tr>
+<td>14:00</td>
+<td>15:30</td>
+<td>7 x 12 minutes talk</td>
+</tr>
+<tr>
+<td>15:30</td>
+<td>16:00</td>
+<td>Coffee Break</td>
+</tr>
+<tr>
+<td>16:00</td>
+<td>17:30</td>
+<td>Poster Session II</td>
+</tr>
+</tbody>
+</table>
+<h2 id="user-content-papers">Papers</h2>
+<p>Here is a list of papers you will be able to see presented at our workshop:</p>
+<table>
+<thead>
+<tr>
+<th>ID</th>
+<th>Type</th>
+<th>Title</th>
+<th>Authors</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>223</td>
+<td>Findings</td>
+<td>MacLaSa: Multi-Aspect Controllable Text Generation via Efficient Sampling from Compact Latent Space</td>
+<td>Hanxing Ding, Liang Pang, Zihao Wei, Huawei Shen, Xueqi Cheng, Tat-Seng Chua</td>
+</tr>
+<tr>
+<td>271</td>
+<td>Findings</td>
+<td>Vector-Quantized Prompt Learning for Paraphrase Generation</td>
+<td>Haotian Luo, Yixin Liu, Peidong Liu, Xianggen Liu</td>
+</tr>
+<tr>
+<td>300</td>
+<td>Findings</td>
+<td>DeltaScore: Story Evaluation with Perturbations</td>
+<td>Zhuohan Xie, Miao Li, Trevor Cohn, Jey Han Lau</td>
+</tr>
+<tr>
+<td>469</td>
+<td>Findings</td>
+<td>Show, Write, and Retrieve: Entity-aware Article Generation and Retrieval</td>
+<td>Zhongping Zhang, Yiwen Gu, Bryan A. Plummer</td>
+</tr>
+<tr>
+<td>575</td>
+<td>Findings</td>
+<td>Adversarial Text Generation by Search and Learning</td>
+<td>Guoyi Li, Bingkang Shi, Zongzhen Liu, Dehan Kong, Yulei Wu, Xiaodan Zhang, Longtao Huang, Honglei Lyu</td>
+</tr>
+<tr>
+<td>651</td>
+<td>Findings</td>
+<td>On Uncertainty Calibration and Selective Generation in Probabilistic Neural Summarization: A Benchmark Study</td>
+<td>Polina Zablotskaia, Du Phan, Joshua Maynez, Shashi Narayan, Jie Ren, Jeremiah Zhe Liu</td>
+</tr>
+<tr>
+<td>731</td>
+<td>Findings</td>
+<td>GROVE: A Retrieval-augmented Complex Story Generation Framework with A Forest of Evidence</td>
+<td>Zhihua Wen, Zhiliang Tian, Wei Wu, Yuxin Yang, Yanqi Shi, Zhen Huang, Dongsheng Li</td>
+</tr>
+<tr>
+<td>963</td>
+<td>Findings</td>
+<td>A Confederacy of Models: a Comprehensive Evaluation of LLMs on Creative Writing</td>
+<td>Carlos Gómez-Rodríguez, Paul Williams</td>
+</tr>
+<tr>
+<td>1154</td>
+<td>Findings</td>
+<td>Can Large Language Models Fix Data Annotation Errors? An Empirical Study Using Debatepedia for Query-Focused Text Summarization</td>
+<td>Md Tahmid Rahman Laskar, Mizanur Rahman, Israt Jahan, Enamul Hoque, Jimmy Huang</td>
+</tr>
+<tr>
+<td>1470</td>
+<td>Findings</td>
+<td>Uniform Complexity for Text Generation</td>
+<td>Joseph Marvin Imperial, Harish Tayyar Madabushi</td>
+</tr>
+<tr>
+<td>1548</td>
+<td>Findings</td>
+<td>Unraveling Downstream Gender Bias from Large Language Models: A Study on AI Educational Writing Assistance</td>
+<td>Thiemo Wambsganss, Xiaotian Su, Vinitra Swamy, Seyed Parsa Neshaei, Roman Rietsche, Tanja Käser</td>
+</tr>
+<tr>
+<td>1562</td>
+<td>Findings</td>
+<td>Geographical Erasure in Language Generation</td>
+<td>Pola Schwöbel, Jacek Golebiowski, Michele Donini, Cedric Archambeau, Danish Pruthi</td>
+</tr>
+<tr>
+<td>1807</td>
+<td>Findings</td>
+<td>Miracle: Towards Personalized Dialogue Generation with Latent-Space Multiple Personal Attribute Control</td>
+<td>Zhenyi Lu, Wei Wei, Xiaoye Qu, Xian-Ling Mao, Dangyang Chen, Jixiong Chen</td>
+</tr>
+<tr>
+<td>1834</td>
+<td>Findings</td>
+<td>A Comprehensive Evaluation of Tool-Assisted Generation Strategies</td>
+<td>Alon Jacovi, Avi Caciularu, Jonathan Herzig, Roee Aharoni, Bernd Bohnet, Mor Geva</td>
+</tr>
+<tr>
+<td>1897</td>
+<td>Findings</td>
+<td>Stylized Dialogue Generation with Feature-Guided Knowledge Augmentation</td>
+<td>Jinpeng Li, Zekai Zhang, Xiuying Chen, Dongyan Zhao, Rui Yan</td>
+</tr>
+<tr>
+<td>1992</td>
+<td>Findings</td>
+<td>Harnessing the power of LLMs: Evaluating human-AI text co-creation through the lens of news headline generation</td>
+<td>Zijian Ding, Alison Smith-Renner, Wenjuan Zhang, Joel R. Tetreault, Alejandro Jaimes</td>
+</tr>
+<tr>
+<td>1993</td>
+<td>Findings</td>
+<td>InfoDiffusion: Information Entropy Aware Diffusion Process for Non-Autoregressive Text Generation</td>
+<td>Renzhi Wang, Jing Li, Piji Li</td>
+</tr>
+<tr>
+<td>2053</td>
+<td>Findings</td>
+<td>The Iron(ic) Melting Pot: Reviewing Human Evaluation in Humour, Irony and Sarcasm Generation</td>
+<td>Tyler Loakman, Aaron Maladry, Chenghua Lin</td>
+</tr>
+<tr>
+<td>2490</td>
+<td>Findings</td>
+<td>Ask To The Point: Open-Domain Entity-Centric Question Generation</td>
+<td>Yuxiang Liu, Jie Huang, Kevin Chang</td>
+</tr>
+<tr>
+<td>2493</td>
+<td>Findings</td>
+<td>Frugal Prompting for Dialog Models</td>
+<td>Bishal Santra, Sakya Basak, Abhinandan De, Manish Gupta, Pawan Goyal</td>
+</tr>
+<tr>
+<td>2716</td>
+<td>Findings</td>
+<td>Towards Informative Open-ended Text Generation with Dynamic Knowledge Triples</td>
+<td>Zixuan Ren, Yang Zhao, Chengqing Zong</td>
+</tr>
+<tr>
+<td>2876</td>
+<td>Findings</td>
+<td>Harnessing the Power of Large Language Models for Empathetic Response Generation: Empirical Investigations and Improvements</td>
+<td>Yushan Qian, Weinan Zhang, Ting Liu</td>
+</tr>
+<tr>
+<td>3010</td>
+<td>Findings</td>
+<td>T5Score: Discriminative Fine-tuning of Generative Evaluation Metrics</td>
+<td>Yiwei Qin, Weizhe Yuan, Graham Neubig, Pengfei Liu</td>
+</tr>
+<tr>
+<td>3019</td>
+<td>Findings</td>
+<td>NLP Evaluation in trouble: On the Need to Measure LLM Data Contamination for each Benchmark</td>
+<td>Oscar Sainz, Jon Ander Campos, Iker García-Ferrero, Julen Etxaniz, Oier Lopez de Lacalle, Eneko Agirre</td>
+</tr>
+<tr>
+<td>3386</td>
+<td>Findings</td>
+<td>Narrative Order Aware Story Generation via Bidirectional Pretraining Model with Optimal Transport Reward</td>
+<td>Zhicong Lu, Li Jin, Guangluan Xu, Linmei Hu, Nayu Liu, Xiaoyu Li, Xian Sun, Zequn Zhang, kaiwen wei</td>
+</tr>
+<tr>
+<td>3613</td>
+<td>Findings</td>
+<td>Goodtriever: Adaptive Toxicity Mitigation with Retrieval-augmented Models</td>
+<td>Luiza Amador Pozzobon, Beyza Ermis, Patrick Lewis, Sara Hooker</td>
+</tr>
+<tr>
+<td>3726</td>
+<td>Findings</td>
+<td>Don’t Add, don’t Miss: Effective Content Preserving Generation from Pre-Selected Text Spans</td>
+<td>Aviv Slobodkin, Avi Caciularu, Eran Hirsch, Ido Dagan</td>
+</tr>
+<tr>
+<td>3802</td>
+<td>Findings</td>
+<td>Ensemble-Instruct: Instruction Tuning Data Generation with a Heterogeneous Mixture of LMs</td>
+<td>Young-Suk Lee, Md Arafat Sultan, Yousef El-Kurdi, Tahira Naseem, Asim Munawar, Radu Florian, Salim Roukos, Ramón Fernandez Astudillo</td>
+</tr>
+<tr>
+<td>4841</td>
+<td>Findings</td>
+<td>A Closer Look into Using Large Language Models for Automatic Evaluation</td>
+<td>Cheng-Han Chiang, Hung-yi Lee</td>
+</tr>
+<tr>
+<td>4954</td>
+<td>Findings</td>
+<td>Pseudointelligence: A Unifying Lens on Language Model Evaluation</td>
+<td>Shikhar Murty, Orr Paradise, Pratyusha Sharma</td>
+</tr>
+<tr>
+<td>5156</td>
+<td>Findings</td>
+<td>Improving Pacing in Long-Form Story Planning</td>
+<td>Yichen Wang, Kevin Yang, Xiaoming Liu, Dan Klein</td>
+</tr>
+<tr>
+<td>5166</td>
+<td>Findings</td>
+<td>“Kelly is a Warm Person, Joseph is a Role Model”: Gender Biases in LLM-Generated Reference Letters</td>
+<td>Yixin Wan, George Pu, Jiao Sun, Aparna Garimella, Kai-Wei Chang, Nanyun Peng</td>
+</tr>
+<tr>
+<td>5563</td>
+<td>Findings</td>
+<td>Bridging Discrete and Continuous Text Spaces for Accelerated Seq2Seq Diffusion Models</td>
+<td>Shansan Gong, Mukai Li, Jiangtao Feng, Zhiyong Wu, Lingpeng Kong</td>
+</tr>
+<tr>
+<td>5603</td>
+<td>Findings</td>
+<td>Exploring Context-Aware Evaluation Metrics for Machine Translation</td>
+<td>Xinyu Hu, Xunjian Yin, Xiaojun Wan</td>
+</tr>
+<tr>
+<td>3</td>
+<td>Main Track</td>
+<td>Contextualizing the Limits of Model &#x26; Evaluation Dataset Curation on Semantic Similarity Classification Tasks</td>
+<td>Daniel Theron</td>
+</tr>
+<tr>
+<td>4</td>
+<td>Main Track</td>
+<td>Dialogue Quality and Emotion Annotations for Customer Support Conversations</td>
+<td>John Mendonca, Patrícia Pereira, Miguel Menezes, Vera Cabarrão, Ana C Farinha, Helena Moniz, Alon Lavie and Isabel Trancoso</td>
+</tr>
+<tr>
+<td>7</td>
+<td>Main Track</td>
+<td>Formalizing content creation and evaluation methods for AI-generated social media content</td>
+<td>Christian Jensen and Axel Højmark</td>
+</tr>
+<tr>
+<td>9</td>
+<td>Main Track</td>
+<td>Automatic Evaluation of Generative Models with Instruction Tuning</td>
+<td>Shuhaib Mehri and Vered Shwartz</td>
+</tr>
+<tr>
+<td>11</td>
+<td>Main Track</td>
+<td>FACTSCORE: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation</td>
+<td>Sewon Min, Kalpesh Krishna, Xinxi Lyu, Mike Lewis, Wen-tau Yih, Pang Wei Koh, Mohit Iyyer, Luke Zettlemoyer and Hannaneh Hajishirzi</td>
+</tr>
+<tr>
+<td>12</td>
+<td>Main Track</td>
+<td>Effective Proxy for Human Labeling: Ensemble Disagreement Scores in Large Language Models for Industrial NLP</td>
+<td>Wei Du, Laksh Advani, Yashmeet Gambhir, Daniel Perry, Prashant Shiralkar, Zhengzheng Xing and Aaron Colak</td>
+</tr>
+<tr>
+<td>14</td>
+<td>Main Track</td>
+<td>Automatic Reflection Generation for Peer-to-Peer Counseling</td>
+<td>Emma O'Neil, João Sedoc, Diyi Yang, Haiyi Zhu and Lyle Ungar</td>
+</tr>
+<tr>
+<td>16</td>
+<td>Main Track</td>
+<td>One-Shot and Few-Shot Exemplification Modeling</td>
+<td>John Harvill, Hee Suk Yoon, Eunseop Yoon, Mark Hasegawa-Johnson and Chang Yoo</td>
+</tr>
+<tr>
+<td>21</td>
+<td>Main Track</td>
+<td>QAMPARI: A Benchmark for Open-domain Questions with Many Answers</td>
+<td>Samuel Amouyal, Tomer Wolfson, Ohad Rubin, Ori Yoran, Jonathan Herzig and Jonathan Berant</td>
+</tr>
+<tr>
+<td>23</td>
+<td>Main Track</td>
+<td>Unveiling Safety Vulnerabilities of Large Language Models</td>
+<td>George Kour, Marcel Zalmanovici, Naama Zwerdling, Esther Goldbraich, Ora Nova Fandina, Ateret Anaby Tavor, Orna Raz and Eitan Farchi</td>
+</tr>
+<tr>
+<td>24</td>
+<td>Main Track</td>
+<td>Adapting Pre-trained Generative Models for Extractive Question Answering</td>
+<td>Prabir Mallick, Tapas Nayak and Indrajit Bhattacharya</td>
+</tr>
+<tr>
+<td>25</td>
+<td>Main Track</td>
+<td>Predicting Question-Answering Performance of Large Language Models through Semantic Consistency</td>
+<td>Ella Rabinovich, Samuel Ackerman, Orna Raz, Eitan Farchi and Ateret Anaby Tavor</td>
+</tr>
+<tr>
+<td>28</td>
+<td>Main Track</td>
+<td>Towards Effective Long-Form QA with Evidence Augmentation</td>
+<td>Mengxia Yu, Sara Rosenthal, Mihaela Bornea and Avi Sil</td>
+</tr>
+<tr>
+<td>30</td>
+<td>Main Track</td>
+<td>Harnessing the Plug-and-Play Controller by Prompting</td>
+<td>Hao Wang and Lei Sha</td>
+</tr>
+<tr>
+<td>32</td>
+<td>Main Track</td>
+<td>Context and Literacy Aware Learnable Metric for Text Simplification</td>
+<td>Jeongwon Kwak, Hyeryun Park, Kyungmo Kim and Jinwook Choi</td>
+</tr>
+<tr>
+<td>33</td>
+<td>Main Track</td>
+<td>Synthetic Dialogue Dataset Generation using LLM Agents</td>
+<td>Yelaman Abdullin, Diego Molla, Bahadorreza Ofoghi, John Yearwood and Qingyang Li</td>
+</tr>
+<tr>
+<td>34</td>
+<td>Main Track</td>
+<td>An Empirical Bayes Framework for Open-Domain Dialogue Generation</td>
+<td>Jing Yang Lee, Kong Aik Lee and Woon Seng Gan</td>
+</tr>
+<tr>
+<td>36</td>
+<td>Main Track</td>
+<td>Flesch or Fumble? Evaluating Readability Standard Alignment of Instruction-Tuned Language Models</td>
+<td>Joseph Marvin Imperial and Harish Tayyar Madabushi</td>
+</tr>
+<tr>
+<td>38</td>
+<td>Main Track</td>
+<td>ChatGPT as a Java Decompiler</td>
+<td>Bradley McDanel and Zhanhao Liu</td>
+</tr>
+<tr>
+<td>41</td>
+<td>Main Track</td>
+<td>Multi-domain Summarization from Leaderboards to Practice: Re-examining Automatic and Human Evaluation</td>
+<td>David Demeter, Oshin Agarwal, Simon Ben Igeri, Marko Sterbentz, Neil Molino, John Conroy and Ani Nenkova</td>
+</tr>
+<tr>
+<td>43</td>
+<td>Main Track</td>
+<td>Targeted Image Data Augmentation Increases Basic Skills Captioning Robustness</td>
+<td>Valentin Barriere, Felipe del Rio, Andres Carvallo, Carlos Aspillaga, Eugenio Herrera-Berg and Cristian Buc</td>
+</tr>
+<tr>
+<td>45</td>
+<td>Main Track</td>
+<td>Separating form and meaning: Using self-consistency to quantify task understanding across multiple senses</td>
+<td>Xenia Ohmer, Elia Bruni and Dieuwke Hupkes</td>
+</tr>
+<tr>
+<td>46</td>
+<td>Main Track</td>
+<td>Text Encoders Lack Knowledge: Leveraging Generative LLMs for Domain-Specific Semantic Textual Similarity</td>
+<td>Joseph Gatto, Omar Sharif, Parker Seegmiller, Philip Bohlman and Sarah Masud Preum</td>
+</tr>
+<tr>
+<td>51</td>
+<td>Main Track</td>
+<td>To Burst or Not to Burst: Generating and Quantifying Improbable Text</td>
+<td>Kuleen Sasse, Efsun Sarioglu Kayi, Samuel Barham and Edward Staley</td>
+</tr>
+<tr>
+<td>52</td>
+<td>Main Track</td>
+<td>Are Large Language Models Reliable Judges? A Study on the Factuality Evaluation Capabilities of LLMs</td>
+<td>Xue-Yong Fu, Md Tahmid Rahman Laskar, Cheng Chen and Shashi Bhushan TN</td>
+</tr>
+<tr>
+<td>54</td>
+<td>Main Track</td>
+<td>RankAug: Augmented data ranking for text classification</td>
+<td>Tiasa Singha Roy and Priyam Basu</td>
+</tr>
+<tr>
+<td>67</td>
+<td>Main Track</td>
+<td>Post Turing: Mapping the landscape of LLM Evaluation</td>
+<td>Alexey Tikhonov and Ivan Yamshchikov</td>
+</tr>
+<tr>
+<td>56</td>
+<td>Main Track</td>
+<td>Elo Uncovered: Robustness and Best Practices in Language Model Evaluation</td>
+<td>Meriem Boubdir, Edward Kim, Beyza Ermis, Sara Hooker and Marzieh Fadaee</td>
+</tr>
+<tr>
+<td>62</td>
+<td>Main Track</td>
+<td>PersonalityChat: Conversation Distillation for Personalized Dialog Modeling with Facts and Traits</td>
+<td>Ehsan Lotfi, Maxime De Bruyn, Jeska Buhmann and Walter Daelemans</td>
+</tr>
+<tr>
+<td>63</td>
+<td>Main Track</td>
+<td>How well ChatGPT understand Malaysian English? An Evaluation on Named Entity Recognition and Relation Extraction</td>
+<td>MohanRaj Chanthran, Lay-Ki Soon, Ong Huey Fang and Bhawani Selvaretnam</td>
+</tr>
+<tr>
+<td>57</td>
+<td>Extended Abstract</td>
+<td>Robust Tooling and New Resources for Large Language Model Evaluation via Catwalk</td>
+<td>Kyle Richardson, Ian Magnusson, Oyvind Tafjord,Akshita Bhagia, Iz Beltagy, Arman Cohan, Pradeep Dasigi,Jesse Dodge, Dirk Groeneveld, Yuling Gu, Ananya Harsh Jha, Tushar Khot and Nishant Subramani</td>
+</tr>
+<tr>
+<td>58</td>
+<td>Extended Abstract</td>
+<td>GUMSum: Multi-Genre Data and Evaluation for English Abstractive Summarization</td>
+<td>Yang Janet Liu and Amir Zeldes</td>
+</tr>
+<tr>
+<td>60</td>
+<td>Extended Abstract</td>
+<td>NewsMet: A ‘Do It All' dataset of contemporary Metaphors in News headlines</td>
+<td>Rohan Joseph, Timothy Liu, Aik Beng Ng, Simon See and Sunny Rai</td>
+</tr>
+<tr>
+<td>20</td>
+<td>Extended Abstract</td>
+<td>On the State of German (Abstractive) Text Summarization</td>
+<td>Dennis Aumiller, Jing Fan and Michael Gertz</td>
+</tr>
+<tr>
+<td>31</td>
+<td>Extended Abstract</td>
+<td>Measuring misogyny in natural language generation: preliminary results from a case study on two Reddit communities</td>
+<td>Aaron Snoswell, Lucinda Nelson, Hao Xue, Flora Salim, Nicolas Suzor and Jean Burgess</td>
+</tr>
+<tr>
+<td>35</td>
+<td>Extended Abstract</td>
+<td>On the Learnability of Watermarks for Language Models</td>
+<td>Chenchen Gu, Xiang Lisa Li, Percy Liang and Tatsunori Hashimoto</td>
+</tr>
+<tr>
+<td>47</td>
+<td>Extended Abstract</td>
+<td>Does Writing with Language Models Reduce Content Diversity?</td>
+<td>Vishakh Padmakumar and He He</td>
+</tr>
+<tr>
+<td>39</td>
+<td>Extended Abstract</td>
+<td>Generative language models exhibit social identity biases</td>
+<td>Tiancheng Hu, Yara Kyrychenko, Jon Roozenbeek and Nigel Collier</td>
+</tr>
+<tr>
+<td>70</td>
+<td>Industry Track</td>
+<td>A Simple yet Efficient Ensemble Approach for AI-generated Text Detection</td>
+<td>Harika Abburi, Kalyani Roy, Michael Suesserman, Nirmala Pudota, Balaji Veeramani, Edward Bowen and Sanmitra Bhattacharya</td>
+</tr>
+<tr>
+<td>17</td>
+<td>Industry Track</td>
+<td>Leveraging Large Language Models for Enhanced Product Descriptions in eCommerce</td>
+<td>Jianghong Zhou, Bo Liu, Jhalak Nilesh Acharya, Yao Hong, Kuang-chih Lee and Musen Wen</td>
+</tr>
+<tr>
+<td>55</td>
+<td>Industry Track</td>
+<td>Separating the Wheat from the Chaff with BREAD: An open-source benchmark and metrics to detect redundancy in text</td>
+<td>Isaac Caswell, Lisa Wang and Isabel Papadimitriou</td>
+</tr>
+</tbody>
+</table>
+<h2 id="user-content-organization">Organization</h2>
 <p><em>Contact</em>:
 <a href="mailto:gem-benchmark-chairs@googlegroups.com">gem-benchmark-chairs@googlegroups.com</a></p>
 <p><em>General Chairs</em></p>
@@ -59,4 +530,4 @@ <h3 id="user-content-organization">Organization</h3>
 <p><em>Industry Track Chairs</em></p>
 <p>Enrico Santus (Bloomberg)</p>
 <p>Hooman Sedghamiz (Bayer)</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003cp\u003eThe Third Version of the \u003ca href=\"https://gem-benchmark.com/\"\u003eGeneration, Evaluation \u0026#x26; Metrics (GEM) Workshop\u003c/a\u003e will be held as part of \u003ca href=\"https://2023.emnlp.org/\"\u003eEMNLP\u003c/a\u003e, December 6-10, 2023.\u003c/p\u003e\n\u003ch3 id=\"user-content-overview\"\u003eOverview\u003c/h3\u003e\n\u003cp\u003eMany new NLP applications are cast through the lens of natural language generation. With the advent of these new approaches, many opportunities arise: generation in previously less studied languages, new evaluation paradigms, methods for corpus creation, more efficient architectures, strategies for safe deployments, among many others. At the same time, we can learn from the rich history of NLG research to further improve generation methods.\nThese developments require robust and sound NLG evaluation processes. To that end, the GEM workshop aims to encourage the development of model auditing and human evaluation strategies, and to popularize model evaluations in languages beyond English.\u003c/p\u003e\n\u003cp\u003eWe welcome submissions related, but not limited to, the following topics:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Automatic evaluation of generation systems (\u003ca href=\"https://aclanthology.org/2021.gem-1.8/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2021.gem-1.1/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.gem-1.26/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Creating NLG corpora and challenge sets (\u003ca href=\"https://aclanthology.org/2022.tacl-1.4/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://openreview.net/forum?id=CSi1eu_2q96\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.gem-1.6/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Critiques of benchmarking efforts and responsibly measuring progress in NLG (\u003ca href=\"https://aclanthology.org/2020.emnlp-main.393/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://openreview.net/forum?id=j6NxpQbREA1\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Effective and/or efficient NLG methods that can be applied to a wide range of languages and/or scenarios (\u003ca href=\"https://aclanthology.org/2020.tacl-1.47/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2021.gem-1.16/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.gem-1.1/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Application and evaluation of generation models interacting with external data and tools (\u003ca href=\"https://arxiv.org/abs/2302.04761\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://arxiv.org/abs/2304.09842\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://arxiv.org/abs/2302.07842\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Sociotechnical perspectives of employing large language models (\u003ca href=\"https://dl.acm.org/doi/abs/10.1145/3531146.3533088\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Standardizing human evaluation and making it more robust (\u003ca href=\"https://aclanthology.org/2021.tacl-1.87/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.humeval-1.7/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.gem-1.12/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eWe further invite submissions that conduct in-depth analyses of outputs of existing systems, for example through error analyses, by applying new metrics, or by testing the system on new test sets. While we encourage the use of the infrastructure the organizing team has developed as part of the \u003ca href=\"https://arxiv.org/abs/2206.11249\"\u003eGEM benchmark\u003c/a\u003e, its use is not required.\u003c/p\u003e\n\u003cp\u003eIf you are interested, you can check out last year's workshop websites from \u003ca href=\"/workshop/2021\"\u003eACL 2021\u003c/a\u003e and \u003ca href=\"/workshop/2022\"\u003eEMNLP 2022\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-industrial-track---unleashing-the-power-of-nlp-bridging-the-gap-between-academia-and-industry\"\u003eIndustrial Track - Unleashing the Power of NLP: Bridging the Gap between Academia and Industry\u003c/h3\u003e\n\u003cp\u003eGEM 2023 is proud to announce the launch of its Industrial Track, which aims to provide actionable insights to industry professionals and to foster collaborations between academia and industry. This track will address the unique challenges faced by non-academic colleagues, highlighting the differences in evaluation practices between academic and industrial research, and explore the challenges in evaluating generative models with real-world data.\u003c/p\u003e\n\u003cp\u003eThe Industrial Track invites submissions covering the following topics, including (but not limited to):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Breaking Barriers: Bridging the Gap between Academic and Industrial Research (\u003ca href=\"https://aclanthology.org/P17-2015\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 From Data Diversity to Model Robustness: Challenges in Evaluating Generative Models with Real-World Data (\u003ca href=\"https://aclanthology.org/2021.sigdial-1.8/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Beyond Metrics: Evaluating Generative Models for Real-World Business Impact (\u003ca href=\"https://arxiv.org/abs/1906.02243\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/P16-2096\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://arxiv.org/abs/2306.07402\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-how-to-submit\"\u003eHow to submit?\u003c/h3\u003e\n\u003cp\u003eSubmissions can take either of the following forms:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Archival Papers Papers describing original and unpublished work can be submitted in a between 4 and 8 page format.\u003c/li\u003e\n\u003cli\u003e💎 Non-Archival Abstracts To discuss work already presented or under review at a peer-reviewed venue, we allow the submission of 2-page abstracts.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eAll submissions are allowed unlimited space for references and appendices and should conform to EMNLP 2023 style guidelines. Archival paper submissions must be anonymized while abstract submissions may include author information.\u003c/p\u003e\n\u003cp\u003eYou can submit directly through \u003ca href=\"https://softconf.com/emnlp2023/GEM2023\"\u003eSoftConf\u003c/a\u003e. Please select the track you are submitting to during the submission.\u003c/p\u003e\n\u003cp\u003eWe additionally welcome presentations by authors of papers in the Findings of the EMNLP. The selection process is managed centrally by the workshop chairs for the conference and we thus cannot respond to all individual inquiries. However, we will try our best to accomodate your requests.\u003c/p\u003e\n\u003ch3 id=\"user-content-shared-task\"\u003eShared Task\u003c/h3\u003e\n\u003cp\u003eWe are organizing a shared task focused on multilingual summarization, including human and automatic evaluation. The Shared Task will be run \"Backwards\": the workshop will serve as a platform to pre-register your hypotheses. More info on how to participate to come!\u003c/p\u003e\n\u003ch3 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h3\u003e\n\u003cp\u003eNote: For any questions, please email \u003ca href=\"mailto:gem-benchmark-chairs@googlegroups.com\"\u003egem-benchmark-chairs@googlegroups.com\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003ePaper Submission Dates\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e📅 8 September 2023: Workshop paper submission deadline\u003c/li\u003e\n\u003cli\u003e📅 20 October 2023:   Workshop paper notification deadline\u003c/li\u003e\n\u003cli\u003e📅 3 November 2023:  Workshop paper camera ready deadline\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003e\u003cstrong\u003eNote\u003c/strong\u003e The website showed wrong dates for notication and CR deadlines. Apologies for any inconvenience.\u003c/p\u003e\n\u003cp\u003eWorkshop Dates\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e📅 6 December 2022: Workshop\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-organization\"\u003eOrganization\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eContact\u003c/em\u003e:\n\u003ca href=\"mailto:gem-benchmark-chairs@googlegroups.com\"\u003egem-benchmark-chairs@googlegroups.com\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003eGeneral Chairs\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eKhyathi Raghavi Chandu (AI2)\u003c/p\u003e\n\u003cp\u003eElizabeth Clark (Google Deepmind)\u003c/p\u003e\n\u003cp\u003eKaustubh Dhole (Emory University)\u003c/p\u003e\n\u003cp\u003eSebastian Gehrmann (Bloomberg)\u003c/p\u003e\n\u003cp\u003eJoão Sedoc (NYU)\u003c/p\u003e\n\u003cp\u003eAlex Wang (Cohere)\u003c/p\u003e\n\u003cp\u003e\u003cem\u003eIndustry Track Chairs\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eEnrico Santus (Bloomberg)\u003c/p\u003e\n\u003cp\u003eHooman Sedghamiz (Bayer)\u003c/p\u003e\n"}},"__N_SSG":true},"page":"/workshop","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003cp\u003eThe Third Version of the \u003ca href=\"https://gem-benchmark.com/\"\u003eGeneration, Evaluation \u0026#x26; Metrics (GEM) Workshop\u003c/a\u003e will be held as part of \u003ca href=\"https://2023.emnlp.org/\"\u003eEMNLP\u003c/a\u003e, 📅 December 6, 2023.\u003c/p\u003e\n\u003ch2 id=\"user-content-overview\"\u003eOverview\u003c/h2\u003e\n\u003cp\u003eMany new NLP applications are cast through the lens of natural language generation. With the advent of these new approaches, many opportunities arise: generation in previously less studied languages, new evaluation paradigms, methods for corpus creation, more efficient architectures, strategies for safe deployments, among many others. At the same time, we can learn from the rich history of NLG research to further improve generation methods.\nThese developments require robust and sound NLG evaluation processes. To that end, the GEM workshop aims to encourage the development of model auditing and human evaluation strategies, and to popularize model evaluations in languages beyond English.\u003c/p\u003e\n\u003cp\u003eIf you are interested, you can check out last year's workshop websites from \u003ca href=\"/workshop/2021\"\u003eACL 2021\u003c/a\u003e and \u003ca href=\"/workshop/2022\"\u003eEMNLP 2022\u003c/a\u003e. Our call for this workshop can be found \u003ca href=\"/workshop/2023-call\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ch2 id=\"user-content-schedule\"\u003eSchedule\u003c/h2\u003e\n\u003cp\u003e** This will be filled in a few days**\u003c/p\u003e\n\u003cp\u003eAll times in local Singapore Time, please use a converter like \u003ca href=\"https://www.timeanddate.com/worldclock/converter.html?iso=20231108T180000\u0026#x26;p1=236\"\u003ethis one\u003c/a\u003e to if you are in a different time zone.\nTo accomodate attendees from as many time zones as possible, we will have a virtual-only part in the evening.\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eStart\u003c/th\u003e\n\u003cth\u003eEnd\u003c/th\u003e\n\u003cth\u003e\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e9:00\u003c/td\u003e\n\u003ctd\u003e10:30\u003c/td\u003e\n\u003ctd\u003eOpening Remarks + 6 x 12 minutes talk\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e10:30\u003c/td\u003e\n\u003ctd\u003e11:00\u003c/td\u003e\n\u003ctd\u003eCoffee Break\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e11:00\u003c/td\u003e\n\u003ctd\u003e12:30\u003c/td\u003e\n\u003ctd\u003ePoster Session\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e12:30\u003c/td\u003e\n\u003ctd\u003e14:00\u003c/td\u003e\n\u003ctd\u003eLunch Break\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e14:00\u003c/td\u003e\n\u003ctd\u003e15:30\u003c/td\u003e\n\u003ctd\u003e7 x 12 minutes talk\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e15:30\u003c/td\u003e\n\u003ctd\u003e16:00\u003c/td\u003e\n\u003ctd\u003eCoffee Break\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e16:00\u003c/td\u003e\n\u003ctd\u003e17:30\u003c/td\u003e\n\u003ctd\u003ePoster Session II\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-papers\"\u003ePapers\u003c/h2\u003e\n\u003cp\u003eHere is a list of papers you will be able to see presented at our workshop:\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eID\u003c/th\u003e\n\u003cth\u003eType\u003c/th\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e223\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eMacLaSa: Multi-Aspect Controllable Text Generation via Efficient Sampling from Compact Latent Space\u003c/td\u003e\n\u003ctd\u003eHanxing Ding, Liang Pang, Zihao Wei, Huawei Shen, Xueqi Cheng, Tat-Seng Chua\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e271\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eVector-Quantized Prompt Learning for Paraphrase Generation\u003c/td\u003e\n\u003ctd\u003eHaotian Luo, Yixin Liu, Peidong Liu, Xianggen Liu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e300\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eDeltaScore: Story Evaluation with Perturbations\u003c/td\u003e\n\u003ctd\u003eZhuohan Xie, Miao Li, Trevor Cohn, Jey Han Lau\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e469\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eShow, Write, and Retrieve: Entity-aware Article Generation and Retrieval\u003c/td\u003e\n\u003ctd\u003eZhongping Zhang, Yiwen Gu, Bryan A. Plummer\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e575\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eAdversarial Text Generation by Search and Learning\u003c/td\u003e\n\u003ctd\u003eGuoyi Li, Bingkang Shi, Zongzhen Liu, Dehan Kong, Yulei Wu, Xiaodan Zhang, Longtao Huang, Honglei Lyu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e651\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eOn Uncertainty Calibration and Selective Generation in Probabilistic Neural Summarization: A Benchmark Study\u003c/td\u003e\n\u003ctd\u003ePolina Zablotskaia, Du Phan, Joshua Maynez, Shashi Narayan, Jie Ren, Jeremiah Zhe Liu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e731\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eGROVE: A Retrieval-augmented Complex Story Generation Framework with A Forest of Evidence\u003c/td\u003e\n\u003ctd\u003eZhihua Wen, Zhiliang Tian, Wei Wu, Yuxin Yang, Yanqi Shi, Zhen Huang, Dongsheng Li\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e963\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eA Confederacy of Models: a Comprehensive Evaluation of LLMs on Creative Writing\u003c/td\u003e\n\u003ctd\u003eCarlos Gómez-Rodríguez, Paul Williams\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1154\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eCan Large Language Models Fix Data Annotation Errors? An Empirical Study Using Debatepedia for Query-Focused Text Summarization\u003c/td\u003e\n\u003ctd\u003eMd Tahmid Rahman Laskar, Mizanur Rahman, Israt Jahan, Enamul Hoque, Jimmy Huang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1470\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eUniform Complexity for Text Generation\u003c/td\u003e\n\u003ctd\u003eJoseph Marvin Imperial, Harish Tayyar Madabushi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1548\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eUnraveling Downstream Gender Bias from Large Language Models: A Study on AI Educational Writing Assistance\u003c/td\u003e\n\u003ctd\u003eThiemo Wambsganss, Xiaotian Su, Vinitra Swamy, Seyed Parsa Neshaei, Roman Rietsche, Tanja Käser\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1562\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eGeographical Erasure in Language Generation\u003c/td\u003e\n\u003ctd\u003ePola Schwöbel, Jacek Golebiowski, Michele Donini, Cedric Archambeau, Danish Pruthi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1807\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eMiracle: Towards Personalized Dialogue Generation with Latent-Space Multiple Personal Attribute Control\u003c/td\u003e\n\u003ctd\u003eZhenyi Lu, Wei Wei, Xiaoye Qu, Xian-Ling Mao, Dangyang Chen, Jixiong Chen\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1834\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eA Comprehensive Evaluation of Tool-Assisted Generation Strategies\u003c/td\u003e\n\u003ctd\u003eAlon Jacovi, Avi Caciularu, Jonathan Herzig, Roee Aharoni, Bernd Bohnet, Mor Geva\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1897\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eStylized Dialogue Generation with Feature-Guided Knowledge Augmentation\u003c/td\u003e\n\u003ctd\u003eJinpeng Li, Zekai Zhang, Xiuying Chen, Dongyan Zhao, Rui Yan\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1992\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eHarnessing the power of LLMs: Evaluating human-AI text co-creation through the lens of news headline generation\u003c/td\u003e\n\u003ctd\u003eZijian Ding, Alison Smith-Renner, Wenjuan Zhang, Joel R. Tetreault, Alejandro Jaimes\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e1993\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eInfoDiffusion: Information Entropy Aware Diffusion Process for Non-Autoregressive Text Generation\u003c/td\u003e\n\u003ctd\u003eRenzhi Wang, Jing Li, Piji Li\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e2053\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eThe Iron(ic) Melting Pot: Reviewing Human Evaluation in Humour, Irony and Sarcasm Generation\u003c/td\u003e\n\u003ctd\u003eTyler Loakman, Aaron Maladry, Chenghua Lin\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e2490\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eAsk To The Point: Open-Domain Entity-Centric Question Generation\u003c/td\u003e\n\u003ctd\u003eYuxiang Liu, Jie Huang, Kevin Chang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e2493\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eFrugal Prompting for Dialog Models\u003c/td\u003e\n\u003ctd\u003eBishal Santra, Sakya Basak, Abhinandan De, Manish Gupta, Pawan Goyal\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e2716\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eTowards Informative Open-ended Text Generation with Dynamic Knowledge Triples\u003c/td\u003e\n\u003ctd\u003eZixuan Ren, Yang Zhao, Chengqing Zong\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e2876\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eHarnessing the Power of Large Language Models for Empathetic Response Generation: Empirical Investigations and Improvements\u003c/td\u003e\n\u003ctd\u003eYushan Qian, Weinan Zhang, Ting Liu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e3010\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eT5Score: Discriminative Fine-tuning of Generative Evaluation Metrics\u003c/td\u003e\n\u003ctd\u003eYiwei Qin, Weizhe Yuan, Graham Neubig, Pengfei Liu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e3019\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eNLP Evaluation in trouble: On the Need to Measure LLM Data Contamination for each Benchmark\u003c/td\u003e\n\u003ctd\u003eOscar Sainz, Jon Ander Campos, Iker García-Ferrero, Julen Etxaniz, Oier Lopez de Lacalle, Eneko Agirre\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e3386\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eNarrative Order Aware Story Generation via Bidirectional Pretraining Model with Optimal Transport Reward\u003c/td\u003e\n\u003ctd\u003eZhicong Lu, Li Jin, Guangluan Xu, Linmei Hu, Nayu Liu, Xiaoyu Li, Xian Sun, Zequn Zhang, kaiwen wei\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e3613\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eGoodtriever: Adaptive Toxicity Mitigation with Retrieval-augmented Models\u003c/td\u003e\n\u003ctd\u003eLuiza Amador Pozzobon, Beyza Ermis, Patrick Lewis, Sara Hooker\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e3726\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eDon’t Add, don’t Miss: Effective Content Preserving Generation from Pre-Selected Text Spans\u003c/td\u003e\n\u003ctd\u003eAviv Slobodkin, Avi Caciularu, Eran Hirsch, Ido Dagan\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e3802\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eEnsemble-Instruct: Instruction Tuning Data Generation with a Heterogeneous Mixture of LMs\u003c/td\u003e\n\u003ctd\u003eYoung-Suk Lee, Md Arafat Sultan, Yousef El-Kurdi, Tahira Naseem, Asim Munawar, Radu Florian, Salim Roukos, Ramón Fernandez Astudillo\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e4841\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eA Closer Look into Using Large Language Models for Automatic Evaluation\u003c/td\u003e\n\u003ctd\u003eCheng-Han Chiang, Hung-yi Lee\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e4954\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003ePseudointelligence: A Unifying Lens on Language Model Evaluation\u003c/td\u003e\n\u003ctd\u003eShikhar Murty, Orr Paradise, Pratyusha Sharma\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e5156\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eImproving Pacing in Long-Form Story Planning\u003c/td\u003e\n\u003ctd\u003eYichen Wang, Kevin Yang, Xiaoming Liu, Dan Klein\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e5166\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003e“Kelly is a Warm Person, Joseph is a Role Model”: Gender Biases in LLM-Generated Reference Letters\u003c/td\u003e\n\u003ctd\u003eYixin Wan, George Pu, Jiao Sun, Aparna Garimella, Kai-Wei Chang, Nanyun Peng\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e5563\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eBridging Discrete and Continuous Text Spaces for Accelerated Seq2Seq Diffusion Models\u003c/td\u003e\n\u003ctd\u003eShansan Gong, Mukai Li, Jiangtao Feng, Zhiyong Wu, Lingpeng Kong\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e5603\u003c/td\u003e\n\u003ctd\u003eFindings\u003c/td\u003e\n\u003ctd\u003eExploring Context-Aware Evaluation Metrics for Machine Translation\u003c/td\u003e\n\u003ctd\u003eXinyu Hu, Xunjian Yin, Xiaojun Wan\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e3\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eContextualizing the Limits of Model \u0026#x26; Evaluation Dataset Curation on Semantic Similarity Classification Tasks\u003c/td\u003e\n\u003ctd\u003eDaniel Theron\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e4\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eDialogue Quality and Emotion Annotations for Customer Support Conversations\u003c/td\u003e\n\u003ctd\u003eJohn Mendonca, Patrícia Pereira, Miguel Menezes, Vera Cabarrão, Ana C Farinha, Helena Moniz, Alon Lavie and Isabel Trancoso\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e7\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eFormalizing content creation and evaluation methods for AI-generated social media content\u003c/td\u003e\n\u003ctd\u003eChristian Jensen and Axel Højmark\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e9\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eAutomatic Evaluation of Generative Models with Instruction Tuning\u003c/td\u003e\n\u003ctd\u003eShuhaib Mehri and Vered Shwartz\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e11\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eFACTSCORE: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation\u003c/td\u003e\n\u003ctd\u003eSewon Min, Kalpesh Krishna, Xinxi Lyu, Mike Lewis, Wen-tau Yih, Pang Wei Koh, Mohit Iyyer, Luke Zettlemoyer and Hannaneh Hajishirzi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e12\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eEffective Proxy for Human Labeling: Ensemble Disagreement Scores in Large Language Models for Industrial NLP\u003c/td\u003e\n\u003ctd\u003eWei Du, Laksh Advani, Yashmeet Gambhir, Daniel Perry, Prashant Shiralkar, Zhengzheng Xing and Aaron Colak\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e14\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eAutomatic Reflection Generation for Peer-to-Peer Counseling\u003c/td\u003e\n\u003ctd\u003eEmma O'Neil, João Sedoc, Diyi Yang, Haiyi Zhu and Lyle Ungar\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e16\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eOne-Shot and Few-Shot Exemplification Modeling\u003c/td\u003e\n\u003ctd\u003eJohn Harvill, Hee Suk Yoon, Eunseop Yoon, Mark Hasegawa-Johnson and Chang Yoo\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e21\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eQAMPARI: A Benchmark for Open-domain Questions with Many Answers\u003c/td\u003e\n\u003ctd\u003eSamuel Amouyal, Tomer Wolfson, Ohad Rubin, Ori Yoran, Jonathan Herzig and Jonathan Berant\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e23\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eUnveiling Safety Vulnerabilities of Large Language Models\u003c/td\u003e\n\u003ctd\u003eGeorge Kour, Marcel Zalmanovici, Naama Zwerdling, Esther Goldbraich, Ora Nova Fandina, Ateret Anaby Tavor, Orna Raz and Eitan Farchi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e24\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eAdapting Pre-trained Generative Models for Extractive Question Answering\u003c/td\u003e\n\u003ctd\u003ePrabir Mallick, Tapas Nayak and Indrajit Bhattacharya\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e25\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003ePredicting Question-Answering Performance of Large Language Models through Semantic Consistency\u003c/td\u003e\n\u003ctd\u003eElla Rabinovich, Samuel Ackerman, Orna Raz, Eitan Farchi and Ateret Anaby Tavor\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e28\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eTowards Effective Long-Form QA with Evidence Augmentation\u003c/td\u003e\n\u003ctd\u003eMengxia Yu, Sara Rosenthal, Mihaela Bornea and Avi Sil\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e30\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eHarnessing the Plug-and-Play Controller by Prompting\u003c/td\u003e\n\u003ctd\u003eHao Wang and Lei Sha\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e32\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eContext and Literacy Aware Learnable Metric for Text Simplification\u003c/td\u003e\n\u003ctd\u003eJeongwon Kwak, Hyeryun Park, Kyungmo Kim and Jinwook Choi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e33\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eSynthetic Dialogue Dataset Generation using LLM Agents\u003c/td\u003e\n\u003ctd\u003eYelaman Abdullin, Diego Molla, Bahadorreza Ofoghi, John Yearwood and Qingyang Li\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e34\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eAn Empirical Bayes Framework for Open-Domain Dialogue Generation\u003c/td\u003e\n\u003ctd\u003eJing Yang Lee, Kong Aik Lee and Woon Seng Gan\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e36\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eFlesch or Fumble? Evaluating Readability Standard Alignment of Instruction-Tuned Language Models\u003c/td\u003e\n\u003ctd\u003eJoseph Marvin Imperial and Harish Tayyar Madabushi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e38\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eChatGPT as a Java Decompiler\u003c/td\u003e\n\u003ctd\u003eBradley McDanel and Zhanhao Liu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e41\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eMulti-domain Summarization from Leaderboards to Practice: Re-examining Automatic and Human Evaluation\u003c/td\u003e\n\u003ctd\u003eDavid Demeter, Oshin Agarwal, Simon Ben Igeri, Marko Sterbentz, Neil Molino, John Conroy and Ani Nenkova\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e43\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eTargeted Image Data Augmentation Increases Basic Skills Captioning Robustness\u003c/td\u003e\n\u003ctd\u003eValentin Barriere, Felipe del Rio, Andres Carvallo, Carlos Aspillaga, Eugenio Herrera-Berg and Cristian Buc\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e45\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eSeparating form and meaning: Using self-consistency to quantify task understanding across multiple senses\u003c/td\u003e\n\u003ctd\u003eXenia Ohmer, Elia Bruni and Dieuwke Hupkes\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e46\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eText Encoders Lack Knowledge: Leveraging Generative LLMs for Domain-Specific Semantic Textual Similarity\u003c/td\u003e\n\u003ctd\u003eJoseph Gatto, Omar Sharif, Parker Seegmiller, Philip Bohlman and Sarah Masud Preum\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e51\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eTo Burst or Not to Burst: Generating and Quantifying Improbable Text\u003c/td\u003e\n\u003ctd\u003eKuleen Sasse, Efsun Sarioglu Kayi, Samuel Barham and Edward Staley\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e52\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eAre Large Language Models Reliable Judges? A Study on the Factuality Evaluation Capabilities of LLMs\u003c/td\u003e\n\u003ctd\u003eXue-Yong Fu, Md Tahmid Rahman Laskar, Cheng Chen and Shashi Bhushan TN\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e54\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eRankAug: Augmented data ranking for text classification\u003c/td\u003e\n\u003ctd\u003eTiasa Singha Roy and Priyam Basu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e67\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003ePost Turing: Mapping the landscape of LLM Evaluation\u003c/td\u003e\n\u003ctd\u003eAlexey Tikhonov and Ivan Yamshchikov\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e56\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eElo Uncovered: Robustness and Best Practices in Language Model Evaluation\u003c/td\u003e\n\u003ctd\u003eMeriem Boubdir, Edward Kim, Beyza Ermis, Sara Hooker and Marzieh Fadaee\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e62\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003ePersonalityChat: Conversation Distillation for Personalized Dialog Modeling with Facts and Traits\u003c/td\u003e\n\u003ctd\u003eEhsan Lotfi, Maxime De Bruyn, Jeska Buhmann and Walter Daelemans\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e63\u003c/td\u003e\n\u003ctd\u003eMain Track\u003c/td\u003e\n\u003ctd\u003eHow well ChatGPT understand Malaysian English? An Evaluation on Named Entity Recognition and Relation Extraction\u003c/td\u003e\n\u003ctd\u003eMohanRaj Chanthran, Lay-Ki Soon, Ong Huey Fang and Bhawani Selvaretnam\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e57\u003c/td\u003e\n\u003ctd\u003eExtended Abstract\u003c/td\u003e\n\u003ctd\u003eRobust Tooling and New Resources for Large Language Model Evaluation via Catwalk\u003c/td\u003e\n\u003ctd\u003eKyle Richardson, Ian Magnusson, Oyvind Tafjord,Akshita Bhagia, Iz Beltagy, Arman Cohan, Pradeep Dasigi,Jesse Dodge, Dirk Groeneveld, Yuling Gu, Ananya Harsh Jha, Tushar Khot and Nishant Subramani\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e58\u003c/td\u003e\n\u003ctd\u003eExtended Abstract\u003c/td\u003e\n\u003ctd\u003eGUMSum: Multi-Genre Data and Evaluation for English Abstractive Summarization\u003c/td\u003e\n\u003ctd\u003eYang Janet Liu and Amir Zeldes\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e60\u003c/td\u003e\n\u003ctd\u003eExtended Abstract\u003c/td\u003e\n\u003ctd\u003eNewsMet: A ‘Do It All' dataset of contemporary Metaphors in News headlines\u003c/td\u003e\n\u003ctd\u003eRohan Joseph, Timothy Liu, Aik Beng Ng, Simon See and Sunny Rai\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e20\u003c/td\u003e\n\u003ctd\u003eExtended Abstract\u003c/td\u003e\n\u003ctd\u003eOn the State of German (Abstractive) Text Summarization\u003c/td\u003e\n\u003ctd\u003eDennis Aumiller, Jing Fan and Michael Gertz\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e31\u003c/td\u003e\n\u003ctd\u003eExtended Abstract\u003c/td\u003e\n\u003ctd\u003eMeasuring misogyny in natural language generation: preliminary results from a case study on two Reddit communities\u003c/td\u003e\n\u003ctd\u003eAaron Snoswell, Lucinda Nelson, Hao Xue, Flora Salim, Nicolas Suzor and Jean Burgess\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e35\u003c/td\u003e\n\u003ctd\u003eExtended Abstract\u003c/td\u003e\n\u003ctd\u003eOn the Learnability of Watermarks for Language Models\u003c/td\u003e\n\u003ctd\u003eChenchen Gu, Xiang Lisa Li, Percy Liang and Tatsunori Hashimoto\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e47\u003c/td\u003e\n\u003ctd\u003eExtended Abstract\u003c/td\u003e\n\u003ctd\u003eDoes Writing with Language Models Reduce Content Diversity?\u003c/td\u003e\n\u003ctd\u003eVishakh Padmakumar and He He\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e39\u003c/td\u003e\n\u003ctd\u003eExtended Abstract\u003c/td\u003e\n\u003ctd\u003eGenerative language models exhibit social identity biases\u003c/td\u003e\n\u003ctd\u003eTiancheng Hu, Yara Kyrychenko, Jon Roozenbeek and Nigel Collier\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e70\u003c/td\u003e\n\u003ctd\u003eIndustry Track\u003c/td\u003e\n\u003ctd\u003eA Simple yet Efficient Ensemble Approach for AI-generated Text Detection\u003c/td\u003e\n\u003ctd\u003eHarika Abburi, Kalyani Roy, Michael Suesserman, Nirmala Pudota, Balaji Veeramani, Edward Bowen and Sanmitra Bhattacharya\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e17\u003c/td\u003e\n\u003ctd\u003eIndustry Track\u003c/td\u003e\n\u003ctd\u003eLeveraging Large Language Models for Enhanced Product Descriptions in eCommerce\u003c/td\u003e\n\u003ctd\u003eJianghong Zhou, Bo Liu, Jhalak Nilesh Acharya, Yao Hong, Kuang-chih Lee and Musen Wen\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e55\u003c/td\u003e\n\u003ctd\u003eIndustry Track\u003c/td\u003e\n\u003ctd\u003eSeparating the Wheat from the Chaff with BREAD: An open-source benchmark and metrics to detect redundancy in text\u003c/td\u003e\n\u003ctd\u003eIsaac Caswell, Lisa Wang and Isabel Papadimitriou\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-organization\"\u003eOrganization\u003c/h2\u003e\n\u003cp\u003e\u003cem\u003eContact\u003c/em\u003e:\n\u003ca href=\"mailto:gem-benchmark-chairs@googlegroups.com\"\u003egem-benchmark-chairs@googlegroups.com\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003eGeneral Chairs\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eKhyathi Raghavi Chandu (AI2)\u003c/p\u003e\n\u003cp\u003eElizabeth Clark (Google Deepmind)\u003c/p\u003e\n\u003cp\u003eKaustubh Dhole (Emory University)\u003c/p\u003e\n\u003cp\u003eSebastian Gehrmann (Bloomberg)\u003c/p\u003e\n\u003cp\u003eJoão Sedoc (NYU)\u003c/p\u003e\n\u003cp\u003eAlex Wang (Cohere)\u003c/p\u003e\n\u003cp\u003e\u003cem\u003eIndustry Track Chairs\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eEnrico Santus (Bloomberg)\u003c/p\u003e\n\u003cp\u003eHooman Sedghamiz (Bayer)\u003c/p\u003e\n"}},"__N_SSG":true},"page":"/workshop","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/workshop/2021.html b/workshop/2021.html
index d4b292aa..9b5ee695 100644
--- a/workshop/2021.html
+++ b/workshop/2021.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2021</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop/2021-01c29971917ca8b8.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Workshop at ACL 2021</span><span class="utils_smallSpace__dcJPu"></span><div><p>The workshop will be held as part of <a href="https://2021.aclweb.org/">ACL-IJCNLP 2021</a>, August 1-6, 2021. It will take place on August 6. It is endorsed by the ACL Special Interest Group on Natural Language Generation (<a href="https://aclweb.org/aclwiki/SIGGEN">SIGGEN</a>).</p>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2021</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop/2021-f9fcbddb51e9ee43.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Workshop at ACL 2021</span><span class="utils_smallSpace__dcJPu"></span><div><p>The workshop will be held as part of <a href="https://2021.aclweb.org/">ACL-IJCNLP 2021</a>, August 1-6, 2021. It will take place on August 6. It is endorsed by the ACL Special Interest Group on Natural Language Generation (<a href="https://aclweb.org/aclwiki/SIGGEN">SIGGEN</a>).</p>
 <p><strong>Note: Our system <a href="https://forms.gle/pds6cbBf2Gf2VGMv7">output submission form</a> is perpetually open, please continue contributing to our benchmark. If you want to help improve GEM in the future, <a href="/team/join">join our team</a>.</strong></p>
 <h2 id="user-content-workshop-overview">Workshop Overview</h2>
 <p>Natural language generation is one of the most active research fields in NLP, with generation, summarization, and dialog among the <a href="https://acl2020.org/blog/general-conference-statistics/">most submitted-to tracks</a>. As such, the number of available datasets, metrics, models, and evaluation strategies are increasing rapidly. This is leading to the situation where new models are often evaluated on different anglo-centric tasks with incompatible evaluation setups. With GEM, we are aiming to solve this problem by standardizing and improving the corpora on which to evaluate NLG models, and by supporting the development of better evaluation approaches. Submitted papers analyze the state of NLG evaluation and propose better alternatives. Moreover, we are organizing the living GEM benchmark which incorporates new advances in data and human and automatic evaluation to make it easier to evaluate models on challenging tasks with the correct tools. In our shared task, models were applied to up to 11 tasks in 18 languages, 80 challenge sets, and their outputs characterized using a combination of human evaluation and over 50 automatic metrics.
@@ -213,4 +213,4 @@ <h2 id="user-content-organization">Organization</h2>
 <li><a href="https://cocoxu.github.io/">Wei Xu</a> (Georgia Tech)</li>
 </ul>
 <p>The shared task and the GEM environment is organized by a larger team which is listed on <a href="/team">this page</a>.</p>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003cp\u003eThe workshop will be held as part of \u003ca href=\"https://2021.aclweb.org/\"\u003eACL-IJCNLP 2021\u003c/a\u003e, August 1-6, 2021. It will take place on August 6. It is endorsed by the ACL Special Interest Group on Natural Language Generation (\u003ca href=\"https://aclweb.org/aclwiki/SIGGEN\"\u003eSIGGEN\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eNote: Our system \u003ca href=\"https://forms.gle/pds6cbBf2Gf2VGMv7\"\u003eoutput submission form\u003c/a\u003e is perpetually open, please continue contributing to our benchmark. If you want to help improve GEM in the future, \u003ca href=\"/team/join\"\u003ejoin our team\u003c/a\u003e.\u003c/strong\u003e\u003c/p\u003e\n\u003ch2 id=\"user-content-workshop-overview\"\u003eWorkshop Overview\u003c/h2\u003e\n\u003cp\u003eNatural language generation is one of the most active research fields in NLP, with generation, summarization, and dialog among the \u003ca href=\"https://acl2020.org/blog/general-conference-statistics/\"\u003emost submitted-to tracks\u003c/a\u003e. As such, the number of available datasets, metrics, models, and evaluation strategies are increasing rapidly. This is leading to the situation where new models are often evaluated on different anglo-centric tasks with incompatible evaluation setups. With GEM, we are aiming to solve this problem by standardizing and improving the corpora on which to evaluate NLG models, and by supporting the development of better evaluation approaches. Submitted papers analyze the state of NLG evaluation and propose better alternatives. Moreover, we are organizing the living GEM benchmark which incorporates new advances in data and human and automatic evaluation to make it easier to evaluate models on challenging tasks with the correct tools. In our shared task, models were applied to up to 11 tasks in 18 languages, 80 challenge sets, and their outputs characterized using a combination of human evaluation and over 50 automatic metrics.\nThrough the presented papers and the shared task, we aim to uncover shortcomings and opportunities for progress.\u003c/p\u003e\n\u003ch2 id=\"user-content-schedule\"\u003eSchedule\u003c/h2\u003e\n\u003cp\u003eAll times in UTC, please use a converter like \u003ca href=\"https://www.timeanddate.com/worldclock/converter.html?iso=20210713T140000\u0026#x26;p1=1440\"\u003ethis one\u003c/a\u003e to convert to your local time.\u003c/p\u003e\n\u003cp\u003eWe do not distinguish between workshop papers and Findings of the ACL papers that are being presented - they are all great!\u003c/p\u003e\n\u003cp\u003eIf you want to suggest questions to the panels, please submit and vote \u003ca href=\"https://www.dory.app/c/google.com/a89b88b9_gem-workshop\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTime (UTC)\u003c/th\u003e\n\u003cth\u003eSession\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e11:30 - 12:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eWelcome and Explanation of Logistics\u003c/strong\u003e \u003ca href=\"https://zoom.us/rec/play/Z4Utn48yNbM-fql72u0VD-g7bwEVvAkwhIFEfDlvHL4XbwS-jfaibaGJxPBr7YUn8hp9LH64Awyr4rpN.Xlb2eSs1oZzwb0Kx\"\u003e(Recording)\u003c/a\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e12:00 - 13:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePoster Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.71/\"\u003eEvaluating the Efficacy of Summarization Evaluation across Languages\u003c/a\u003e   \u003cem\u003eFajri Koto, Jey Han Lau, and Timothy Baldwin\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.233/\"\u003eAutomatic Text Simplification for Social Good: Progress and Challenges\u003c/a\u003e     \u003cem\u003eSanja Stajner\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.1/\"\u003eFlesch-Kincaid is Not a Text Simplification Evaluation Metric\u003c/a\u003e   \u003cem\u003eTeerapaun Tanprasert and David Kauchak\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.2/\"\u003eHuman Perception in Natural Language Generation\u003c/a\u003e        \u003cem\u003eLorenzo De Mattei, Huiyuan Lai, Felice Dell'Orletta, and Malvina Nissim\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.3/\"\u003eSemantic Similarity Based Evaluation for Abstractive News Summarization\u003c/a\u003e  \u003cem\u003eFigen Beken Fikri, Kemal Oflazer, and Berrin Yanikoglu\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.4/\"\u003eShades of BLEU, Flavours of Success: The Case of MultiWOZ\u003c/a\u003e  \u003cem\u003eTomáš Nekvinda and Ondřej Dušek\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e13:00 - 13:45\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePanel Discussion with Hady Elsahar, Seraphina Goldfarb-Tarrant, He He, and Ehud Reiter\u003c/strong\u003e  Suggest questions \u003ca href=\"https://www.dory.app/c/google.com/a89b88b9_gem-workshop\"\u003ehere\u003c/a\u003e. \u003ca href=\"https://zoom.us/rec/play/bsb5van3a2Jvq5DJkOgvOer95vvVlJBwBBPCnhTJ50LT0VKIBmRFhoGyn2wIuH6LuUty0j-jpQVKXYw.C5LAzk7oY-yvwSZG\"\u003e(Recording)\u003c/a\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e13:45 - 14:00\u003c/td\u003e\n\u003ctd\u003e\u003cem\u003eBreak\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e14:00 - 15:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eTalk Session\u003c/strong\u003e \u003ca href=\"https://zoom.us/rec/play/yAjsZcmiIbnyp4l0Q86VEyI3X0M71edMQbFP8xL8KtIvukRpMhGG3pLPQNEQEPd_sjSJPabeAlBjXAso.1z7EK2B3eClMHLfe\"\u003e(Recording)\u003c/a\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.5/\"\u003ePersonalized Response Generation with Tensor Factorization\u003c/a\u003e     \u003cem\u003eZhenghui Wang, Lingxiao Luo, and Diyi Yang\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.6/\"\u003eA Review of Human Evaluation for Style Transfer\u003c/a\u003e     \u003cem\u003eEleftheria Briakou, Sweta Agrawal, Ke Zhang, Joel Tetreault, and Marine Carpuat\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.7/\"\u003eGOT: Testing for Originality in Natural Language Generation\u003c/a\u003e     \u003cem\u003eJennifer Brooks and Abdou Youssef\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.8/\"\u003eEvaluating Text Generation from Discourse Representation Structures\u003c/a\u003e     \u003cem\u003eChunliu Wang, Rik van Noord, Arianna Bisazza, and Johan Bos\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e15:00 - 16:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePoster Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.120/\"\u003eDetecting Hallucinated Content in Conditional Neural Sequence Generation\u003c/a\u003e   \u003cem\u003eChunting Zhou, Graham Neubig, Jiatao Gu, Mona Diab, Paco Guzman,    Luke Zettlemoyer, and Marjan Ghazvininejad\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.338/\"\u003eSynthesizing Adversarial Negative Responses for Robust Response Ranking and Evaluation\u003c/a\u003e     \u003cem\u003ePrakhar Gupta, Yulia Tsvetkov, and Jeffrey Bigham\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.342/\"\u003ePerceptual Models of Machine-Edited Text\u003c/a\u003e     \u003cem\u003eElizabeth Merkhofer, Monica-Ann Mendoza, Rebecca Marvin, and John Henderson\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.357/\"\u003eImproving Automated Evaluation of Open Domain Dialog via Diverse Reference Augmentation\u003c/a\u003e     \u003cem\u003eVarun Gangal, Harsh Jhamtani, Eduard Hovy, and Taylor Berg-Kirkpatrick\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.413/\"\u003eXL-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages\u003c/a\u003e     \u003cem\u003eTahmid Hasan, Abhik Bhattacharjee, Md. Saiful Islam, Kazi Mubasshir, Yuan-Fang Li,    Yong-Bin Kang, M. Sohel Rahman, and Rifat Shahriyar\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.9/\"\u003eHuman Evaluation of Creative NLG Systems: An Interdisciplinary Survey on Recent Papers\u003c/a\u003e     \u003cem\u003eMika Hämäläinen and Khalid Alnajjar\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e16:00 - 17:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eKeynote by Asli Celikyilmaz\u003c/strong\u003e \u003ca href=\"https://zoom.us/rec/play/Eh2VWbjML3NM2jOZyXxI5cqywc5K9mcVorP-UxkDqSlOoiOlP3ZdkNjAtp4h4g2PkjNMIa7Cl9_ZlKHd.QEosWrAg6NdSLONY\"\u003e(Recording)\u003c/a\u003e    Are language models enough for narrative coherence?  \u003cstrong\u003eAbstract\u003c/strong\u003e Automatic text generation enables computers to summarize online meetings, write stories or articles about an event, have conversations in customer-service, chit-chat with individuals, describe pictures to visually impaired, and similar tasks. In this talk, I will discuss challenges and shortcomings of building such systems with the current neural text generation models focusing on issues relating to modeling discourse structure and narrative flow. I will present our recent approaches that imbue transformer based neural generators with structural representations by way of implicit memory architectures and latent structural embeddings. I will conclude the talk pointing to avenues for future research.\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e17:00 - 17:45\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePanel Discussion with Anya Belz, Asli Celikyilmaz, Mike Lewis, Lisa Li, and Wang Lu\u003c/strong\u003e   Suggest questions \u003ca href=\"https://www.dory.app/c/google.com/a89b88b9_gem-workshop\"\u003ehere\u003c/a\u003e. \u003ca href=\"https://zoom.us/rec/play/AGdUUXkDEma7iVmzTTVnKeJpX0HoYkIAWUuY0hnPX-lzDD36tyOrerss38sZFy0vEgGV475krc72TYE9.MMB7eJ3JhuRkuU6v\"\u003e(Recording)\u003c/a\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e17:45 - 18:00\u003c/td\u003e\n\u003ctd\u003e\u003cem\u003eBreak\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e18:00 - 19:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eGEM Overview Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.10/\"\u003eThe GEM Benchmark: Natural Language Generation, its Evaluation and Metrics\u003c/a\u003e     \u003cem\u003eEveryone listed on the \u003ca href=\"/team\"\u003eGEM team page\u003c/a\u003e\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.11/\"\u003eReusable Templates and Guides For Documenting Datasets and Models for Natural Language   Processing and Generation: A Case Study of the HuggingFace and GEM Data and Model Cards\u003c/a\u003e     \u003cem\u003eAngelina McMillan-Major, Salomey Osei, Juan Diego Rodriguez, Pawan Sasanka Ammanamanchi,   Sebastian Gehrmann, and Yacine Jernite\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003ePreliminary Results of the GEM Shared Task     \u003cem\u003eGEM Organizers\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"/nl_augmenter\"\u003eNL-Augmenter: A Collaborative Effort to Transform and Filter Text Datasets\u003c/a\u003e     \u003cem\u003eKaustubh Dhole, Sebastian Gehrmann, Jascha Sohl-Dickstein, Varun Prashant Gangal,   Tongshuang Wu, Simon Mille, Zhenhao Li, Aadesh Gupta, Samson Tan, Saad Mahmood,   Ashish Shrivastava, Ondrej Dusek, and Jinho D. Choi\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e19:00 - 20:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eGEM System Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.12/\"\u003eStructure-to-Text Generation with Self-Training, Acceptability Classifiers and Context-Conditioning\u003c/a\u003e   for the GEM Shared Task     \u003cem\u003eShreyan Bakshi, Soumya Batra, Peyman Heidari, Ankit Arun, Shashank Jain, and Michael White\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.13/\"\u003eNUIG-DSI’s submission to The GEM Benchmark 2021\u003c/a\u003e     \u003cem\u003eNivranshu Pasricha, Mihael Arcan, and Paul Buitelaar\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.15/\"\u003eSystem Description for the CommonGen task with the POINTER model\u003c/a\u003e     \u003cem\u003eAnna Shvets\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.14/\"\u003eSimpleNER Sentence Simplification System for GEM 2021\u003c/a\u003e     \u003cem\u003eK V Aditya Srivatsa, Monil Gokani, and Manish Shrivastava\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e20:00 - 21:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePoster Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.42/\"\u003eGO FIGURE: A Meta Evaluation of Factuality in Summarization\u003c/a\u003e     \u003cem\u003eSaadia Gabriel, Asli Celikyilmaz, Rahul Jha, Yejin Choi, and Jianfeng Gao\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.53/\"\u003eTellMeWhy: A Dataset for Answering Why-Questions in Narratives\u003c/a\u003e     \u003cem\u003eYash Kumar Lal, Nathanael Chambers, Raymond Mooney and Niranjan Balasubramanian\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.192/\"\u003eIs Human Scoring the Best Criteria for Summary Evaluation?\u003c/a\u003e     \u003cem\u003eOleg Vasilyev and John Bohannon\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.334/\"\u003eAre Larger Pretrained Language Models Uniformly Better? Comparing Performance at the Instance Level\u003c/a\u003e     \u003cem\u003eRuiqi Zhong, Dhruba Ghosh, Dan Klein, and Jacob Steinhardt\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.455/\"\u003eElaborative Simplification: Content Addition and Explanation Generation in Text Simplification\u003c/a\u003e     \u003cem\u003eNeha Srikanth and Junyi Jessy Li\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.16/\"\u003eDecoding Methods for Neural Narrative Generation\u003c/a\u003e     \u003cem\u003eAlexandra DeLucia, Aaron Mueller, Xiang Lisa Li, and João Sedoc\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h2\u003e\n\u003ch3 id=\"user-content-workshop\"\u003eWorkshop\u003c/h3\u003e\n\u003cp\u003e\u003ccode\u003eFebruary 2\u003c/code\u003e First Call for Shared Task Submissions and Papers, Release of the Training Data\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 3\u003c/code\u003e Workshop Paper Due Date (excl. shared tasks) \u003cstrong\u003eUPDATED\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 28\u003c/code\u003e  Notification of Acceptance (excl. shared tasks)\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 7\u003c/code\u003e  Camera-ready papers due (excl. shared tasks)\u003c/p\u003e\n\u003ch3 id=\"user-content-shared-task-dates\"\u003eShared Task Dates\u003c/h3\u003e\n\u003cp\u003e\u003cstrong\u003eModeling\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eFebruary 2\u003c/code\u003e Release of the training Data\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMarch 29\u003c/code\u003e Release of the test sets\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 14\u003c/code\u003e Modeling submissions due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 11\u003c/code\u003e System Descriptions and Analyses due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 25\u003c/code\u003e Notification of Acceptance (shared task)\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJuly 9\u003c/code\u003e Camera-ready papers and task descriptions due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eAugust 5-6\u003c/code\u003e Workshop Dates\u003c/p\u003e\n\u003ch2 id=\"user-content-organization\"\u003eOrganization\u003c/h2\u003e\n\u003cp\u003eThe workshop is organized by\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://atcbosselut.github.io/\"\u003eAntoine Bosselut\u003c/a\u003e (Stanford University)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"http://www.cs.cornell.edu/~esindurmus/\"\u003eEsin Durmus\u003c/a\u003e (Cornell University)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://vgtomahawk.github.io/\"\u003eVarun Prashant Gangal\u003c/a\u003e (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://sebastiangehrmann.com\"\u003eSebastian Gehrmann\u003c/a\u003e (Google Research)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://yjernite.github.io/\"\u003eYacine Jernite\u003c/a\u003e (Hugging Face)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"http://homepages.inf.ed.ac.uk/lperez/\"\u003eLaura Perez-Beltrachini\u003c/a\u003e (University of Edinburgh)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://webpages.uncc.edu/sshaikh2/\"\u003eSamira Shaikh\u003c/a\u003e (UNC Charlotte)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://cocoxu.github.io/\"\u003eWei Xu\u003c/a\u003e (Georgia Tech)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eThe shared task and the GEM environment is organized by a larger team which is listed on \u003ca href=\"/team\"\u003ethis page\u003c/a\u003e.\u003c/p\u003e\n","title":"GEM Workshop at ACL 2021"}},"__N_SSG":true},"page":"/workshop/2021","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003cp\u003eThe workshop will be held as part of \u003ca href=\"https://2021.aclweb.org/\"\u003eACL-IJCNLP 2021\u003c/a\u003e, August 1-6, 2021. It will take place on August 6. It is endorsed by the ACL Special Interest Group on Natural Language Generation (\u003ca href=\"https://aclweb.org/aclwiki/SIGGEN\"\u003eSIGGEN\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eNote: Our system \u003ca href=\"https://forms.gle/pds6cbBf2Gf2VGMv7\"\u003eoutput submission form\u003c/a\u003e is perpetually open, please continue contributing to our benchmark. If you want to help improve GEM in the future, \u003ca href=\"/team/join\"\u003ejoin our team\u003c/a\u003e.\u003c/strong\u003e\u003c/p\u003e\n\u003ch2 id=\"user-content-workshop-overview\"\u003eWorkshop Overview\u003c/h2\u003e\n\u003cp\u003eNatural language generation is one of the most active research fields in NLP, with generation, summarization, and dialog among the \u003ca href=\"https://acl2020.org/blog/general-conference-statistics/\"\u003emost submitted-to tracks\u003c/a\u003e. As such, the number of available datasets, metrics, models, and evaluation strategies are increasing rapidly. This is leading to the situation where new models are often evaluated on different anglo-centric tasks with incompatible evaluation setups. With GEM, we are aiming to solve this problem by standardizing and improving the corpora on which to evaluate NLG models, and by supporting the development of better evaluation approaches. Submitted papers analyze the state of NLG evaluation and propose better alternatives. Moreover, we are organizing the living GEM benchmark which incorporates new advances in data and human and automatic evaluation to make it easier to evaluate models on challenging tasks with the correct tools. In our shared task, models were applied to up to 11 tasks in 18 languages, 80 challenge sets, and their outputs characterized using a combination of human evaluation and over 50 automatic metrics.\nThrough the presented papers and the shared task, we aim to uncover shortcomings and opportunities for progress.\u003c/p\u003e\n\u003ch2 id=\"user-content-schedule\"\u003eSchedule\u003c/h2\u003e\n\u003cp\u003eAll times in UTC, please use a converter like \u003ca href=\"https://www.timeanddate.com/worldclock/converter.html?iso=20210713T140000\u0026#x26;p1=1440\"\u003ethis one\u003c/a\u003e to convert to your local time.\u003c/p\u003e\n\u003cp\u003eWe do not distinguish between workshop papers and Findings of the ACL papers that are being presented - they are all great!\u003c/p\u003e\n\u003cp\u003eIf you want to suggest questions to the panels, please submit and vote \u003ca href=\"https://www.dory.app/c/google.com/a89b88b9_gem-workshop\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTime (UTC)\u003c/th\u003e\n\u003cth\u003eSession\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e11:30 - 12:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eWelcome and Explanation of Logistics\u003c/strong\u003e \u003ca href=\"https://zoom.us/rec/play/Z4Utn48yNbM-fql72u0VD-g7bwEVvAkwhIFEfDlvHL4XbwS-jfaibaGJxPBr7YUn8hp9LH64Awyr4rpN.Xlb2eSs1oZzwb0Kx\"\u003e(Recording)\u003c/a\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e12:00 - 13:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePoster Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.71/\"\u003eEvaluating the Efficacy of Summarization Evaluation across Languages\u003c/a\u003e   \u003cem\u003eFajri Koto, Jey Han Lau, and Timothy Baldwin\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.233/\"\u003eAutomatic Text Simplification for Social Good: Progress and Challenges\u003c/a\u003e     \u003cem\u003eSanja Stajner\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.1/\"\u003eFlesch-Kincaid is Not a Text Simplification Evaluation Metric\u003c/a\u003e   \u003cem\u003eTeerapaun Tanprasert and David Kauchak\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.2/\"\u003eHuman Perception in Natural Language Generation\u003c/a\u003e        \u003cem\u003eLorenzo De Mattei, Huiyuan Lai, Felice Dell'Orletta, and Malvina Nissim\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.3/\"\u003eSemantic Similarity Based Evaluation for Abstractive News Summarization\u003c/a\u003e  \u003cem\u003eFigen Beken Fikri, Kemal Oflazer, and Berrin Yanikoglu\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.4/\"\u003eShades of BLEU, Flavours of Success: The Case of MultiWOZ\u003c/a\u003e  \u003cem\u003eTomáš Nekvinda and Ondřej Dušek\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e13:00 - 13:45\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePanel Discussion with Hady Elsahar, Seraphina Goldfarb-Tarrant, He He, and Ehud Reiter\u003c/strong\u003e  Suggest questions \u003ca href=\"https://www.dory.app/c/google.com/a89b88b9_gem-workshop\"\u003ehere\u003c/a\u003e. \u003ca href=\"https://zoom.us/rec/play/bsb5van3a2Jvq5DJkOgvOer95vvVlJBwBBPCnhTJ50LT0VKIBmRFhoGyn2wIuH6LuUty0j-jpQVKXYw.C5LAzk7oY-yvwSZG\"\u003e(Recording)\u003c/a\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e13:45 - 14:00\u003c/td\u003e\n\u003ctd\u003e\u003cem\u003eBreak\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e14:00 - 15:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eTalk Session\u003c/strong\u003e \u003ca href=\"https://zoom.us/rec/play/yAjsZcmiIbnyp4l0Q86VEyI3X0M71edMQbFP8xL8KtIvukRpMhGG3pLPQNEQEPd_sjSJPabeAlBjXAso.1z7EK2B3eClMHLfe\"\u003e(Recording)\u003c/a\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.5/\"\u003ePersonalized Response Generation with Tensor Factorization\u003c/a\u003e     \u003cem\u003eZhenghui Wang, Lingxiao Luo, and Diyi Yang\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.6/\"\u003eA Review of Human Evaluation for Style Transfer\u003c/a\u003e     \u003cem\u003eEleftheria Briakou, Sweta Agrawal, Ke Zhang, Joel Tetreault, and Marine Carpuat\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.7/\"\u003eGOT: Testing for Originality in Natural Language Generation\u003c/a\u003e     \u003cem\u003eJennifer Brooks and Abdou Youssef\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.8/\"\u003eEvaluating Text Generation from Discourse Representation Structures\u003c/a\u003e     \u003cem\u003eChunliu Wang, Rik van Noord, Arianna Bisazza, and Johan Bos\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e15:00 - 16:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePoster Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.120/\"\u003eDetecting Hallucinated Content in Conditional Neural Sequence Generation\u003c/a\u003e   \u003cem\u003eChunting Zhou, Graham Neubig, Jiatao Gu, Mona Diab, Paco Guzman,    Luke Zettlemoyer, and Marjan Ghazvininejad\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.338/\"\u003eSynthesizing Adversarial Negative Responses for Robust Response Ranking and Evaluation\u003c/a\u003e     \u003cem\u003ePrakhar Gupta, Yulia Tsvetkov, and Jeffrey Bigham\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.342/\"\u003ePerceptual Models of Machine-Edited Text\u003c/a\u003e     \u003cem\u003eElizabeth Merkhofer, Monica-Ann Mendoza, Rebecca Marvin, and John Henderson\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.357/\"\u003eImproving Automated Evaluation of Open Domain Dialog via Diverse Reference Augmentation\u003c/a\u003e     \u003cem\u003eVarun Gangal, Harsh Jhamtani, Eduard Hovy, and Taylor Berg-Kirkpatrick\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.413/\"\u003eXL-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages\u003c/a\u003e     \u003cem\u003eTahmid Hasan, Abhik Bhattacharjee, Md. Saiful Islam, Kazi Mubasshir, Yuan-Fang Li,    Yong-Bin Kang, M. Sohel Rahman, and Rifat Shahriyar\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.9/\"\u003eHuman Evaluation of Creative NLG Systems: An Interdisciplinary Survey on Recent Papers\u003c/a\u003e     \u003cem\u003eMika Hämäläinen and Khalid Alnajjar\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e16:00 - 17:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eKeynote by Asli Celikyilmaz\u003c/strong\u003e \u003ca href=\"https://zoom.us/rec/play/Eh2VWbjML3NM2jOZyXxI5cqywc5K9mcVorP-UxkDqSlOoiOlP3ZdkNjAtp4h4g2PkjNMIa7Cl9_ZlKHd.QEosWrAg6NdSLONY\"\u003e(Recording)\u003c/a\u003e    Are language models enough for narrative coherence?  \u003cstrong\u003eAbstract\u003c/strong\u003e Automatic text generation enables computers to summarize online meetings, write stories or articles about an event, have conversations in customer-service, chit-chat with individuals, describe pictures to visually impaired, and similar tasks. In this talk, I will discuss challenges and shortcomings of building such systems with the current neural text generation models focusing on issues relating to modeling discourse structure and narrative flow. I will present our recent approaches that imbue transformer based neural generators with structural representations by way of implicit memory architectures and latent structural embeddings. I will conclude the talk pointing to avenues for future research.\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e17:00 - 17:45\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePanel Discussion with Anya Belz, Asli Celikyilmaz, Mike Lewis, Lisa Li, and Wang Lu\u003c/strong\u003e   Suggest questions \u003ca href=\"https://www.dory.app/c/google.com/a89b88b9_gem-workshop\"\u003ehere\u003c/a\u003e. \u003ca href=\"https://zoom.us/rec/play/AGdUUXkDEma7iVmzTTVnKeJpX0HoYkIAWUuY0hnPX-lzDD36tyOrerss38sZFy0vEgGV475krc72TYE9.MMB7eJ3JhuRkuU6v\"\u003e(Recording)\u003c/a\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e17:45 - 18:00\u003c/td\u003e\n\u003ctd\u003e\u003cem\u003eBreak\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e18:00 - 19:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eGEM Overview Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.10/\"\u003eThe GEM Benchmark: Natural Language Generation, its Evaluation and Metrics\u003c/a\u003e     \u003cem\u003eEveryone listed on the \u003ca href=\"/team\"\u003eGEM team page\u003c/a\u003e\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.11/\"\u003eReusable Templates and Guides For Documenting Datasets and Models for Natural Language   Processing and Generation: A Case Study of the HuggingFace and GEM Data and Model Cards\u003c/a\u003e     \u003cem\u003eAngelina McMillan-Major, Salomey Osei, Juan Diego Rodriguez, Pawan Sasanka Ammanamanchi,   Sebastian Gehrmann, and Yacine Jernite\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003ePreliminary Results of the GEM Shared Task     \u003cem\u003eGEM Organizers\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"/nl_augmenter\"\u003eNL-Augmenter: A Collaborative Effort to Transform and Filter Text Datasets\u003c/a\u003e     \u003cem\u003eKaustubh Dhole, Sebastian Gehrmann, Jascha Sohl-Dickstein, Varun Prashant Gangal,   Tongshuang Wu, Simon Mille, Zhenhao Li, Aadesh Gupta, Samson Tan, Saad Mahmood,   Ashish Shrivastava, Ondrej Dusek, and Jinho D. Choi\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e19:00 - 20:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003eGEM System Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.12/\"\u003eStructure-to-Text Generation with Self-Training, Acceptability Classifiers and Context-Conditioning\u003c/a\u003e   for the GEM Shared Task     \u003cem\u003eShreyan Bakshi, Soumya Batra, Peyman Heidari, Ankit Arun, Shashank Jain, and Michael White\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.13/\"\u003eNUIG-DSI’s submission to The GEM Benchmark 2021\u003c/a\u003e     \u003cem\u003eNivranshu Pasricha, Mihael Arcan, and Paul Buitelaar\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.15/\"\u003eSystem Description for the CommonGen task with the POINTER model\u003c/a\u003e     \u003cem\u003eAnna Shvets\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.14/\"\u003eSimpleNER Sentence Simplification System for GEM 2021\u003c/a\u003e     \u003cem\u003eK V Aditya Srivatsa, Monil Gokani, and Manish Shrivastava\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e20:00 - 21:00\u003c/td\u003e\n\u003ctd\u003e\u003cstrong\u003ePoster Session\u003c/strong\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.42/\"\u003eGO FIGURE: A Meta Evaluation of Factuality in Summarization\u003c/a\u003e     \u003cem\u003eSaadia Gabriel, Asli Celikyilmaz, Rahul Jha, Yejin Choi, and Jianfeng Gao\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.53/\"\u003eTellMeWhy: A Dataset for Answering Why-Questions in Narratives\u003c/a\u003e     \u003cem\u003eYash Kumar Lal, Nathanael Chambers, Raymond Mooney and Niranjan Balasubramanian\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.192/\"\u003eIs Human Scoring the Best Criteria for Summary Evaluation?\u003c/a\u003e     \u003cem\u003eOleg Vasilyev and John Bohannon\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.334/\"\u003eAre Larger Pretrained Language Models Uniformly Better? Comparing Performance at the Instance Level\u003c/a\u003e     \u003cem\u003eRuiqi Zhong, Dhruba Ghosh, Dan Klein, and Jacob Steinhardt\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.findings-acl.455/\"\u003eElaborative Simplification: Content Addition and Explanation Generation in Text Simplification\u003c/a\u003e     \u003cem\u003eNeha Srikanth and Junyi Jessy Li\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003ca href=\"https://aclanthology.org/2021.gem-1.16/\"\u003eDecoding Methods for Neural Narrative Generation\u003c/a\u003e     \u003cem\u003eAlexandra DeLucia, Aaron Mueller, Xiang Lisa Li, and João Sedoc\u003c/em\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h2\u003e\n\u003ch3 id=\"user-content-workshop\"\u003eWorkshop\u003c/h3\u003e\n\u003cp\u003e\u003ccode\u003eFebruary 2\u003c/code\u003e First Call for Shared Task Submissions and Papers, Release of the Training Data\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 3\u003c/code\u003e Workshop Paper Due Date (excl. shared tasks) \u003cstrong\u003eUPDATED\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 28\u003c/code\u003e  Notification of Acceptance (excl. shared tasks)\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 7\u003c/code\u003e  Camera-ready papers due (excl. shared tasks)\u003c/p\u003e\n\u003ch3 id=\"user-content-shared-task-dates\"\u003eShared Task Dates\u003c/h3\u003e\n\u003cp\u003e\u003cstrong\u003eModeling\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eFebruary 2\u003c/code\u003e Release of the training Data\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMarch 29\u003c/code\u003e Release of the test sets\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eMay 14\u003c/code\u003e Modeling submissions due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 11\u003c/code\u003e System Descriptions and Analyses due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJune 25\u003c/code\u003e Notification of Acceptance (shared task)\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eJuly 9\u003c/code\u003e Camera-ready papers and task descriptions due\u003c/p\u003e\n\u003cp\u003e\u003ccode\u003eAugust 5-6\u003c/code\u003e Workshop Dates\u003c/p\u003e\n\u003ch2 id=\"user-content-organization\"\u003eOrganization\u003c/h2\u003e\n\u003cp\u003eThe workshop is organized by\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e\u003ca href=\"https://atcbosselut.github.io/\"\u003eAntoine Bosselut\u003c/a\u003e (Stanford University)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"http://www.cs.cornell.edu/~esindurmus/\"\u003eEsin Durmus\u003c/a\u003e (Cornell University)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://vgtomahawk.github.io/\"\u003eVarun Prashant Gangal\u003c/a\u003e (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://sebastiangehrmann.com\"\u003eSebastian Gehrmann\u003c/a\u003e (Google Research)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://yjernite.github.io/\"\u003eYacine Jernite\u003c/a\u003e (Hugging Face)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"http://homepages.inf.ed.ac.uk/lperez/\"\u003eLaura Perez-Beltrachini\u003c/a\u003e (University of Edinburgh)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://webpages.uncc.edu/sshaikh2/\"\u003eSamira Shaikh\u003c/a\u003e (UNC Charlotte)\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://cocoxu.github.io/\"\u003eWei Xu\u003c/a\u003e (Georgia Tech)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eThe shared task and the GEM environment is organized by a larger team which is listed on \u003ca href=\"/team\"\u003ethis page\u003c/a\u003e.\u003c/p\u003e\n","title":"GEM Workshop at ACL 2021"}},"__N_SSG":true},"page":"/workshop/2021","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/workshop/2022-call.html b/workshop/2022-call.html
index efc16fb3..507c8846 100644
--- a/workshop/2022-call.html
+++ b/workshop/2022-call.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2022</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop/2022-call-ae9c70e62c8298a6.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Workshop at EMNLP 2022</span><span class="utils_smallSpace__dcJPu"></span><div><h1 id="user-content-the-gem--workshop-at-emnlp-2022">The GEM 💎 Workshop at EMNLP 2022</h1>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2022</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop/2022-call-43c4e2f64520f9cb.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Workshop at EMNLP 2022</span><span class="utils_smallSpace__dcJPu"></span><div><h1 id="user-content-the-gem--workshop-at-emnlp-2022">The GEM 💎 Workshop at EMNLP 2022</h1>
 <p>The Second Version of <a href="https://gem-benchmark.com/">Generation, Evaluation &#x26; Metrics (GEM) Workshop 2022</a> workshop will be held as part of <a href="https://2022.emnlp.org/">EMNLP</a>, December 7-11, 2022. It is endorsed by the ACL Special Interest Group on Natural Language Generation (<a href="https://aclweb.org/aclwiki/SIGGEN">SIGGEN</a>).</p>
 <h3 id="user-content-overview">Overview</h3>
 <p>Natural language generation (NLG) is one of the most active research fields in NLP. Yet, much of the work is focused on English, and too little attention is given to evaluation processes. As many of the recent developments in few-shot and in-context learning have led to the treatment of many tasks as text generation problems, the need for better NLG evaluation processes is becoming more urgent. To that end, the GEM workshop aims to encourage the development of (semi-) automatic model audits and improved human evaluation strategies, and to popularize model evaluations in languages beyond English.</p>
@@ -53,4 +53,4 @@ <h3 id="user-content-organization">Organization</h3>
 <li>Esin Durmus (Stanford University)</li>
 <li>Samira Shaikh (UNC Charlotte)</li>
 </ul>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003ch1 id=\"user-content-the-gem--workshop-at-emnlp-2022\"\u003eThe GEM 💎 Workshop at EMNLP 2022\u003c/h1\u003e\n\u003cp\u003eThe Second Version of \u003ca href=\"https://gem-benchmark.com/\"\u003eGeneration, Evaluation \u0026#x26; Metrics (GEM) Workshop 2022\u003c/a\u003e workshop will be held as part of \u003ca href=\"https://2022.emnlp.org/\"\u003eEMNLP\u003c/a\u003e, December 7-11, 2022. It is endorsed by the ACL Special Interest Group on Natural Language Generation (\u003ca href=\"https://aclweb.org/aclwiki/SIGGEN\"\u003eSIGGEN\u003c/a\u003e).\u003c/p\u003e\n\u003ch3 id=\"user-content-overview\"\u003eOverview\u003c/h3\u003e\n\u003cp\u003eNatural language generation (NLG) is one of the most active research fields in NLP. Yet, much of the work is focused on English, and too little attention is given to evaluation processes. As many of the recent developments in few-shot and in-context learning have led to the treatment of many tasks as text generation problems, the need for better NLG evaluation processes is becoming more urgent. To that end, the GEM workshop aims to encourage the development of (semi-) automatic model audits and improved human evaluation strategies, and to popularize model evaluations in languages beyond English.\u003c/p\u003e\n\u003cp\u003eWe welcome submissions related, but not limited to, the following topics:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Automatic evaluation of NLG systems (\u003ca href=\"https://aclanthology.org/2021.gem-1.8/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2021.gem-1.1/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Creating NLG corpora and challenge sets (\u003ca href=\"https://aclanthology.org/2022.tacl-1.4/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://openreview.net/forum?id=CSi1eu_2q96\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Critiques of benchmarking efforts and responsibly measuring progress in NLG (\u003ca href=\"https://aclanthology.org/2020.emnlp-main.393/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://openreview.net/forum?id=j6NxpQbREA1\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Effective and/or efficient NLG methods that can be applied to a wide range of languages and tasks (\u003ca href=\"https://aclanthology.org/2020.tacl-1.47/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2021.gem-1.16/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Standardizing human evaluation and making it more robust (\u003ca href=\"https://aclanthology.org/2021.tacl-1.87/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.humeval-1.7/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eWe additionally invite submissions that conduct in-depth analyses of outputs of existing systems, for example through error analyses, by applying new metrics, or by testing the system on new test sets. While we encourage the use of the infrastructure the organizing team is developing as part of the \u003ca href=\"https://arxiv.org/abs/2206.11249\"\u003eGEM benchmark\u003c/a\u003e, its use is not required.\u003c/p\u003e\n\u003cp\u003eIf you are interested in seeing last year's workshop website from ACL 2021, please check \u003ca href=\"/workshop/2021\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-how-to-submit\"\u003eHow to submit?\u003c/h3\u003e\n\u003cp\u003eSubmissions can take either of the following forms:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Archival Papers Papers describing original and unpublished work can be submitted in a between 4 and 8 page format.\u003c/li\u003e\n\u003cli\u003e💎 Non-Archival Abstracts To discuss work already presented or under review at a peer-reviewed venue, we allow the submission of 2-page abstracts.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eAll submissions are allowed unlimited space for references and appendices and should conform to EMNLP 2022 style guidelines. Archival paper submissions must be anonymized while abstract submissions may include author information.\u003c/p\u003e\n\u003cp\u003eYou can either commit a paper reviewed through ARR at \u003ca href=\"https://openreview.net/group?id=EMNLP/2022/Workshop/GEM\"\u003ehere\u003c/a\u003e or submit directly through SoftConf \u003ca href=\"https://softconf.com/emnlp2022/gem2022\"\u003elink\u003c/a\u003e. Note that there are separate deadlines for the two options (see below)\u003c/p\u003e\n\u003cp\u003eWe additionally invite presentations by authors of papers in the Findings of the EMNLP (details to be announced at a later date).\u003c/p\u003e\n\u003ch3 id=\"user-content-shared-task\"\u003eShared Task\u003c/h3\u003e\n\u003cp\u003eWe are organizing a shared task focused on multilingual summarization, including human and automatic evaluation. The Shared Task will be run \"Backwards\": the workshop will serve as a platform to pre-register your hypotheses. More info on how to participate to come!\u003c/p\u003e\n\u003ch3 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h3\u003e\n\u003cp\u003eNote: If you need any accomodations for the deadlines, please email \u003ca href=\"mailto:gem-benchmark-chairs@googlegroups.com\"\u003egem-benchmark-chairs@googlegroups.com\u003c/a\u003e to reach us.\u003c/p\u003e\n\u003cp\u003ePaper Submission Dates\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e📅 \u003cdel\u003e7\u003c/del\u003e 21 September 2022: Workshop paper submission deadline if using Softconf\u003c/li\u003e\n\u003cli\u003e📅 \u003cdel\u003e2\u003c/del\u003e 17 October 2022:   Latest ARR commitment deadline\u003c/li\u003e\n\u003cli\u003e📅 \u003cdel\u003e9\u003c/del\u003e 24 October 2022:   Workshop paper notification deadline\u003c/li\u003e\n\u003cli\u003e📅 \u003cdel\u003e16\u003c/del\u003e 31 October 2022:  Workshop paper camera ready deadline\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eWorkshop Dates\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e📅 7-8 December 2022: Workshops\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-organization\"\u003eOrganization\u003c/h3\u003e\n\u003cul\u003e\n\u003cli\u003eAntoine Bosselut (EPFL)\u003c/li\u003e\n\u003cli\u003eKhyathi Chandu (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eKaustubh Dhole (Emory University)\u003c/li\u003e\n\u003cli\u003eVarun Gangal (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eSebastian Gehrmann (Google Research)\u003c/li\u003e\n\u003cli\u003eYacine Jernite (Hugging Face)\u003c/li\u003e\n\u003cli\u003eJekaterina Novikova (NoOverfitting Lab)\u003c/li\u003e\n\u003cli\u003eLaura Perez-Beltrachini (University of Edinburgh)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eSteering Committee\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eWei Xu (Georgia Tech)\u003c/li\u003e\n\u003cli\u003eEsin Durmus (Stanford University)\u003c/li\u003e\n\u003cli\u003eSamira Shaikh (UNC Charlotte)\u003c/li\u003e\n\u003c/ul\u003e\n"}},"__N_SSG":true},"page":"/workshop/2022-call","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003ch1 id=\"user-content-the-gem--workshop-at-emnlp-2022\"\u003eThe GEM 💎 Workshop at EMNLP 2022\u003c/h1\u003e\n\u003cp\u003eThe Second Version of \u003ca href=\"https://gem-benchmark.com/\"\u003eGeneration, Evaluation \u0026#x26; Metrics (GEM) Workshop 2022\u003c/a\u003e workshop will be held as part of \u003ca href=\"https://2022.emnlp.org/\"\u003eEMNLP\u003c/a\u003e, December 7-11, 2022. It is endorsed by the ACL Special Interest Group on Natural Language Generation (\u003ca href=\"https://aclweb.org/aclwiki/SIGGEN\"\u003eSIGGEN\u003c/a\u003e).\u003c/p\u003e\n\u003ch3 id=\"user-content-overview\"\u003eOverview\u003c/h3\u003e\n\u003cp\u003eNatural language generation (NLG) is one of the most active research fields in NLP. Yet, much of the work is focused on English, and too little attention is given to evaluation processes. As many of the recent developments in few-shot and in-context learning have led to the treatment of many tasks as text generation problems, the need for better NLG evaluation processes is becoming more urgent. To that end, the GEM workshop aims to encourage the development of (semi-) automatic model audits and improved human evaluation strategies, and to popularize model evaluations in languages beyond English.\u003c/p\u003e\n\u003cp\u003eWe welcome submissions related, but not limited to, the following topics:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Automatic evaluation of NLG systems (\u003ca href=\"https://aclanthology.org/2021.gem-1.8/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2021.gem-1.1/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Creating NLG corpora and challenge sets (\u003ca href=\"https://aclanthology.org/2022.tacl-1.4/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://openreview.net/forum?id=CSi1eu_2q96\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Critiques of benchmarking efforts and responsibly measuring progress in NLG (\u003ca href=\"https://aclanthology.org/2020.emnlp-main.393/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://openreview.net/forum?id=j6NxpQbREA1\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Effective and/or efficient NLG methods that can be applied to a wide range of languages and tasks (\u003ca href=\"https://aclanthology.org/2020.tacl-1.47/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2021.gem-1.16/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Standardizing human evaluation and making it more robust (\u003ca href=\"https://aclanthology.org/2021.tacl-1.87/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.humeval-1.7/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eWe additionally invite submissions that conduct in-depth analyses of outputs of existing systems, for example through error analyses, by applying new metrics, or by testing the system on new test sets. While we encourage the use of the infrastructure the organizing team is developing as part of the \u003ca href=\"https://arxiv.org/abs/2206.11249\"\u003eGEM benchmark\u003c/a\u003e, its use is not required.\u003c/p\u003e\n\u003cp\u003eIf you are interested in seeing last year's workshop website from ACL 2021, please check \u003ca href=\"/workshop/2021\"\u003ehere\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-how-to-submit\"\u003eHow to submit?\u003c/h3\u003e\n\u003cp\u003eSubmissions can take either of the following forms:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Archival Papers Papers describing original and unpublished work can be submitted in a between 4 and 8 page format.\u003c/li\u003e\n\u003cli\u003e💎 Non-Archival Abstracts To discuss work already presented or under review at a peer-reviewed venue, we allow the submission of 2-page abstracts.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eAll submissions are allowed unlimited space for references and appendices and should conform to EMNLP 2022 style guidelines. Archival paper submissions must be anonymized while abstract submissions may include author information.\u003c/p\u003e\n\u003cp\u003eYou can either commit a paper reviewed through ARR at \u003ca href=\"https://openreview.net/group?id=EMNLP/2022/Workshop/GEM\"\u003ehere\u003c/a\u003e or submit directly through SoftConf \u003ca href=\"https://softconf.com/emnlp2022/gem2022\"\u003elink\u003c/a\u003e. Note that there are separate deadlines for the two options (see below)\u003c/p\u003e\n\u003cp\u003eWe additionally invite presentations by authors of papers in the Findings of the EMNLP (details to be announced at a later date).\u003c/p\u003e\n\u003ch3 id=\"user-content-shared-task\"\u003eShared Task\u003c/h3\u003e\n\u003cp\u003eWe are organizing a shared task focused on multilingual summarization, including human and automatic evaluation. The Shared Task will be run \"Backwards\": the workshop will serve as a platform to pre-register your hypotheses. More info on how to participate to come!\u003c/p\u003e\n\u003ch3 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h3\u003e\n\u003cp\u003eNote: If you need any accomodations for the deadlines, please email \u003ca href=\"mailto:gem-benchmark-chairs@googlegroups.com\"\u003egem-benchmark-chairs@googlegroups.com\u003c/a\u003e to reach us.\u003c/p\u003e\n\u003cp\u003ePaper Submission Dates\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e📅 \u003cdel\u003e7\u003c/del\u003e 21 September 2022: Workshop paper submission deadline if using Softconf\u003c/li\u003e\n\u003cli\u003e📅 \u003cdel\u003e2\u003c/del\u003e 17 October 2022:   Latest ARR commitment deadline\u003c/li\u003e\n\u003cli\u003e📅 \u003cdel\u003e9\u003c/del\u003e 24 October 2022:   Workshop paper notification deadline\u003c/li\u003e\n\u003cli\u003e📅 \u003cdel\u003e16\u003c/del\u003e 31 October 2022:  Workshop paper camera ready deadline\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eWorkshop Dates\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e📅 7-8 December 2022: Workshops\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-organization\"\u003eOrganization\u003c/h3\u003e\n\u003cul\u003e\n\u003cli\u003eAntoine Bosselut (EPFL)\u003c/li\u003e\n\u003cli\u003eKhyathi Chandu (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eKaustubh Dhole (Emory University)\u003c/li\u003e\n\u003cli\u003eVarun Gangal (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eSebastian Gehrmann (Google Research)\u003c/li\u003e\n\u003cli\u003eYacine Jernite (Hugging Face)\u003c/li\u003e\n\u003cli\u003eJekaterina Novikova (NoOverfitting Lab)\u003c/li\u003e\n\u003cli\u003eLaura Perez-Beltrachini (University of Edinburgh)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eSteering Committee\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eWei Xu (Georgia Tech)\u003c/li\u003e\n\u003cli\u003eEsin Durmus (Stanford University)\u003c/li\u003e\n\u003cli\u003eSamira Shaikh (UNC Charlotte)\u003c/li\u003e\n\u003c/ul\u003e\n"}},"__N_SSG":true},"page":"/workshop/2022-call","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/workshop/2022.html b/workshop/2022.html
index efcb2e35..ad467e0d 100644
--- a/workshop/2022.html
+++ b/workshop/2022.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2022</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-b48e3bfe07390621.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop/2022-09b035959070f75b.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_buildManifest.js" defer=""></script><script src="/_next/static/b8rjfKshCOVHfiTDQnV_D/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/"><a>GEM BENCHMARK</a></a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources"><a>Resources</a></a></li><li class="navbar_navitem__15TsF"><a href="/data_cards"><a>Data Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/model_cards"><a>Model Cards</a></a></li><li class="navbar_navitem__15TsF"><a href="/tutorials"><a>tutorials</a></a></li><li class="navbar_navitem__15TsF"><a href="/results"><a>Results</a></a></li><li class="navbar_navitem__15TsF"><a href="/papers"><a>Papers</a></a></li><li class="navbar_navitem__15TsF"><a href="/workshop"><a>Workshop</a></a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM 💎 Workshop at EMNLP 2022</span><span class="utils_smallSpace__dcJPu"></span><div><p>The Second Version of <a href="https://gem-benchmark.com/">Generation, Evaluation &#x26; Metrics (GEM) Workshop 2022</a> workshop will be held as part of <a href="https://2022.emnlp.org/">EMNLP</a>, December 7, 2022. It is endorsed by the ACL Special Interest Group on Natural Language Generation (<a href="https://aclweb.org/aclwiki/SIGGEN">SIGGEN</a>).</p>
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2022</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop/2022-0e921309e3e202c4.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM 💎 Workshop at EMNLP 2022</span><span class="utils_smallSpace__dcJPu"></span><div><p>The Second Version of <a href="https://gem-benchmark.com/">Generation, Evaluation &#x26; Metrics (GEM) Workshop 2022</a> workshop will be held as part of <a href="https://2022.emnlp.org/">EMNLP</a>, December 7, 2022. It is endorsed by the ACL Special Interest Group on Natural Language Generation (<a href="https://aclweb.org/aclwiki/SIGGEN">SIGGEN</a>).</p>
 <p>The workshop will be held in hybrid mode with sessions in-person and via the conference portal.</p>
 <h2 id="user-content-schedule">Schedule</h2>
 <p>All times in Gulf Standard Time, please use a converter like <a href="https://www.timeanddate.com/worldclock/converter.html?iso=20210713T040000&#x26;p1=1440&#x26;p2=2&#x26;p3=179&#x26;p4=136&#x26;p5=224&#x26;p6=33&#x26;p7=248">this one</a> to convert to your local time.
@@ -523,4 +523,4 @@ <h3 id="user-content-organization">Organization</h3>
 <li>Esin Durmus (Stanford University)</li>
 <li>Samira Shaikh (UNC Charlotte)</li>
 </ul>
-</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/"><a>← Home</a></a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003cp\u003eThe Second Version of \u003ca href=\"https://gem-benchmark.com/\"\u003eGeneration, Evaluation \u0026#x26; Metrics (GEM) Workshop 2022\u003c/a\u003e workshop will be held as part of \u003ca href=\"https://2022.emnlp.org/\"\u003eEMNLP\u003c/a\u003e, December 7, 2022. It is endorsed by the ACL Special Interest Group on Natural Language Generation (\u003ca href=\"https://aclweb.org/aclwiki/SIGGEN\"\u003eSIGGEN\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eThe workshop will be held in hybrid mode with sessions in-person and via the conference portal.\u003c/p\u003e\n\u003ch2 id=\"user-content-schedule\"\u003eSchedule\u003c/h2\u003e\n\u003cp\u003eAll times in Gulf Standard Time, please use a converter like \u003ca href=\"https://www.timeanddate.com/worldclock/converter.html?iso=20210713T040000\u0026#x26;p1=1440\u0026#x26;p2=2\u0026#x26;p3=179\u0026#x26;p4=136\u0026#x26;p5=224\u0026#x26;p6=33\u0026#x26;p7=248\"\u003ethis one\u003c/a\u003e to convert to your local time.\nTo accomodate attendees from as many time zones as possible, we will have a virtual-only part in the evening.\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eIn-Person and Virtual\u003c/th\u003e\n\u003cth\u003eStart\u003c/th\u003e\n\u003cth\u003eEnd\u003c/th\u003e\n\u003cth\u003e\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e9:00\u003c/td\u003e\n\u003ctd\u003e10:30\u003c/td\u003e\n\u003ctd\u003eOpening Remarks and Keynote 1 (Sean Welleck)\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e10:30\u003c/td\u003e\n\u003ctd\u003e11:00\u003c/td\u003e\n\u003ctd\u003eCoffee Break\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e11:00\u003c/td\u003e\n\u003ctd\u003e12:30\u003c/td\u003e\n\u003ctd\u003eTalk Session 1\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e12:30\u003c/td\u003e\n\u003ctd\u003e14:00\u003c/td\u003e\n\u003ctd\u003eLunch\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e14:00\u003c/td\u003e\n\u003ctd\u003e15:30\u003c/td\u003e\n\u003ctd\u003ePoster Session\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e15:30\u003c/td\u003e\n\u003ctd\u003e16:00\u003c/td\u003e\n\u003ctd\u003eCoffee Break\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e16:00\u003c/td\u003e\n\u003ctd\u003e17:00\u003c/td\u003e\n\u003ctd\u003eKeynote 2 (Timo Schick)\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e17:00\u003c/td\u003e\n\u003ctd\u003e18:30\u003c/td\u003e\n\u003ctd\u003eTalk Session 2\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003cstrong\u003eVirtual-Only Part\u003c/strong\u003e\u003c/td\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e20:00\u003c/td\u003e\n\u003ctd\u003e21:00\u003c/td\u003e\n\u003ctd\u003eKeynote 3 (Emily Dinan)\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e21:00\u003c/td\u003e\n\u003ctd\u003e22:30\u003c/td\u003e\n\u003ctd\u003ePoster Session\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-keynotes\"\u003eKeynotes\u003c/h2\u003e\n\u003ch3 id=\"user-content-keynote-1---sean-welleck\"\u003eKeynote 1 - Sean Welleck\u003c/h3\u003e\n\u003ch4 id=\"user-content-reflections-on-trusting-untrustworthy-language-generators\"\u003eReflections on Trusting Untrustworthy Language Generators\u003c/h4\u003e\n\u003cp\u003e\u003cstrong\u003eABSTRACT\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eIn his 1984 Turing Award Lecture “Reflections on Trusting Trust”, Ken Thompson famously said “You can’t trust code that you did not totally create yourself”. These words are especially relevant today, as powerful and flexible language models generate natural language and code that is increasingly human-like. However, these same systems challenge our trust, exhibiting odd degeneracies, amplifying biases, and producing flawed reasoning. In this talk, I will introduce two directions for harnessing the potential of these language models while mitigating the risks. First, I will discuss unlearning: removing undesirable behaviors by integrating feedback and learning. Second, I will discuss how integrating language models with trustworthy symbolic systems can open the door to tackling challenging mathematical reasoning tasks. Join me as we explore the path towards trusting untrustworthy language generators.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eBIO\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eSean Welleck is a Postdoctoral Scholar at the University of Washington and the Allen Institute for Artificial Intelligence, working with Yejin Choi. His research focuses on algorithms for natural language generation and machine reasoning, with the aim of minimizing the effort needed to trust the output of AI systems. He has developed unlearning, decoding, and evaluation algorithms for controllable neural language generation, and methods for integrating language models with symbolic systems, with a particular focus on mathematical reasoning. He received his Ph.D. from New York University, where he was advised by Kyunghyun Cho. Outside of his research activities, he hosts the Thesis Review Podcast and enjoys running long distances.\u003c/p\u003e\n\u003ch3 id=\"user-content-keynote-2---timo-schick\"\u003eKeynote 2 - Timo Schick\u003c/h3\u003e\n\u003ch4 id=\"user-content-instructable-and-collaborative-language-models\"\u003eInstructable and Collaborative Language Models\u003c/h4\u003e\n\u003cp\u003e\u003cstrong\u003eABSTRACT\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eTextual content is often the output of a collaborative writing process — which includes writing text, making comments and changes, finding references, and asking others for help —, but today’s NLP models are only trained to generate the final output of this process. In this talk, we will discuss an alternative approach where models are trained to imitate the entire writing process. We will look at examples of how this enables models to plan and explain their actions, to correct their own mistakes, and to better collaborate with humans. We will also discuss how to make such models better at following human-written instructions.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eBIO\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eTimo Schick is a research scientist at FAIR working on few-shot learning in NLP. Previously, he did his PhD at the Center for Information and Language Processing (CIS) in Munich and worked in industry as a data scientist for several years. Timo's current research focuses on instruction-based learning and teaching language models to collaborate with other entities.\u003c/p\u003e\n\u003ch3 id=\"user-content-keynote-3---emily-dinan\"\u003eKeynote 3 - Emily Dinan\u003c/h3\u003e\n\u003ch4 id=\"user-content-challenges-in-evaluating-safety-for-llms\"\u003eChallenges in evaluating safety for LLMs\u003c/h4\u003e\n\u003cp\u003e\u003cstrong\u003eABSTRACT\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eWhile research on large language models (LLMs) continues to accelerate, much recent work has called attention to anticipated risks and harms from their use in society. We will discuss challenges in evaluating the relative safety of these models as well as current approaches for doing so. Finally, we will highlight avenues for future research into evaluating and mitigating these harms.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eBIO\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eEmily Dinan is a Research Engineer at FAIR (Meta AI) in New York. Her research interests include conversational AI, natural language processing, and safety and responsibility in these fields. Recently she has focused on methods for preventing conversational agents from reproducing biased, toxic, or otherwise harmful language. Prior to joining FAIR, she received her master's degree in Mathematics from the University of Washington.\u003c/p\u003e\n\u003ch2 id=\"user-content-sessions-and-papers\"\u003eSessions and Papers\u003c/h2\u003e\n\u003ch3 id=\"user-content-talk-session-1\"\u003eTalk Session 1\u003c/h3\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003cth\u003eMode\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eDEMETR: Diagnosing Evaluation Metrics for Translation\u003c/td\u003e\n\u003ctd\u003eMarzena Karpinska, Nishant Raj, Katherine Thai, Yixiao Song, Ankita Gupta and Mohit Iyyer\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eBOOKSUM: A Collection of Datasets for Long-form Narrative Summarization\u003c/td\u003e\n\u003ctd\u003eWojciech Kryscinski, Nazneen Rajani, Divyansh Agarwal, Caiming Xiong and Dragomir Radev\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eA Survey of Recent Error Annotation Schemes for Automatically Generated Text\u003c/td\u003e\n\u003ctd\u003eRudali Huidrom and Anya Belz\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTruncation Sampling as Language Model Desmoothing\u003c/td\u003e\n\u003ctd\u003eJohn Hewitt, Christopher Manning and Percy Liang\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eError Analysis of ToTTo Table-to-Text Neural NLG Models\u003c/td\u003e\n\u003ctd\u003eBarkavi Sundararajan, Somayajulu Sripada and Ehud Reiter\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTowards Credible Human Evaluation of Open-Domain Dialog Systems Using Interactive Setup\u003c/td\u003e\n\u003ctd\u003eSijia Liu, Patrick Lange, Behnam Hedayatnia, Alexandros Papangelis, Di Jin, Andrew Wirth, Yang Liu and Dilek Hakkani-Tur\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch3 id=\"user-content-talk-session-2\"\u003eTalk Session 2\u003c/h3\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003cth\u003eMode\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eImproving abstractive summarization with energy-based re-ranking\u003c/td\u003e\n\u003ctd\u003eDiogo Pernes, Afonso Mendes and André F. T. Martins\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eRevisiting text decomposition methods for NLI-based factuality scoring of summaries\u003c/td\u003e\n\u003ctd\u003eJohn Glover, Federico Fancellu, Vasudevan Jagannathan, Matthew R. Gormley and Thomas Schaaf\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eA Corpus and Evaluation for Predicting Semi-Structured Human Annotations\u003c/td\u003e\n\u003ctd\u003eAndreas Marfurt, Ashley Thornton, David Sylvan, Lonneke van der Plas and James Henderson\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAnswerability: A custom metric for evaluating chatbot performance\u003c/td\u003e\n\u003ctd\u003ePranav Gupta, Anand A. Rajasekar, Amisha Patel, Mandar Kulkarni, Alexander Sunell, Kyung Kim and Anusua Trivedi\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eControl Prefixes for Parameter-Efficient Text Generation\u003c/td\u003e\n\u003ctd\u003eJordan Clive, Kris Cao and Marek Rei\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAssessing Inter-metric Correlation for Multi-document Summarization Evaluation\u003c/td\u003e\n\u003ctd\u003eMichael Ridenour, Ameeta Agrawal and Olubusayo Olabisi\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch3 id=\"user-content-poster-session---in-person\"\u003ePoster Session - In-Person\u003c/h3\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eTask-driven augmented data evaluation\u003c/td\u003e\n\u003ctd\u003eOlga Golovneva, Pan Wei, Khadige Abboud, Charith Peris, Lizhen Tan and Haiyang Yu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWeakly Supervised Context-based Interview Question Generation\u003c/td\u003e\n\u003ctd\u003eSamiran Pal, Kaamraan Khan, Avinash Kumar Singh, Subhasish Ghosh, Tapas Nayak, Girish Palshikar and Indrajit Bhattacharya\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAnalyzing Multi-Task Learning for Abstractive Text Summarization\u003c/td\u003e\n\u003ctd\u003eFrederic Thomas Kirstein, Jan Philip Wahle, Terry Ruas and Bela Gipp\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eCLSE: Corpus of Linguistically Significant Entities\u003c/td\u003e\n\u003ctd\u003eAleksandr Chuklin, Justin Zhao and Mihir Kale\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTowards In-Context Non-Expert Evaluation of Reflection Generation for Counselling Conversations\u003c/td\u003e\n\u003ctd\u003eZixiu Wu, Simone Balloccu, Rim Helaoui, Diego Reforgiato Recupero and Daniele Riboni\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEvaluation of Response Generation Models: Shouldn't It Be Shareable and Replicable?\u003c/td\u003e\n\u003ctd\u003eSeyed Mahed Mousavi, Gabriel Roccabruna, Michela Lorandi, Simone Caldarella and Giuseppe Riccardi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEnhancing and Evaluating the Grammatical Framework Approach to Logic-to-Text Generation\u003c/td\u003e\n\u003ctd\u003eEduardo Calò, Elze van der Werf, Albert Gatt and Kees van Deemter\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTransfer learning for multilingual vacancy text generation\u003c/td\u003e\n\u003ctd\u003eAnna Lorincz, David Graus, Dor Lavi and Joao Lebre Magalhaes Pereira\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEdiT5: Semi-Autoregressive Text Editing with T5 Warm-Start\u003c/td\u003e\n\u003ctd\u003eJonathan Mallinson, Jakub Adamek, Eric Malmi and Aliaksei Severyn\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eUnsupervised Token-level Hallucination Detection from Summary Generation By-products\u003c/td\u003e\n\u003ctd\u003eAndreas Marfurt and James Henderson\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eT5QL: Taming language models for SQL generation\u003c/td\u003e\n\u003ctd\u003eSamuel David Arcadinho, David Aparicio, Hugo Veiga and Antonio Alegria\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eHuman perceiving behavior modeling in evaluation of code generation models\u003c/td\u003e\n\u003ctd\u003eSergey V. Kovalchuk, Vadim Lomshakov and Artem Aliev\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eGiCCS: A German in-Context Conversational Similarity Benchmark\u003c/td\u003e\n\u003ctd\u003eShima Asaadi, Zahra Kolagar, Alina Liebel and Alessandra Zarcone\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eMeasuring the Measuring Tools: An Automatic Evaluation of Semantic Metrics for Text Corpora\u003c/td\u003e\n\u003ctd\u003eGeorge Kour, Samuel Ackerman, Eitan Daniel Farchi, Orna Raz, Boaz Carmeli and Ateret Anaby Tavor\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eLongEval: Guidelines for Human Evaluation of Faithfulness in Long-form Summarization\u003c/td\u003e\n\u003ctd\u003eKalpesh Krishna, Erin Bransom, Bailey Kuehl, Mohit Iyyer, Arman Cohan, Pradeep Dasigi and Kyle Lo\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eExploring Document-Level Literary Machine Translation with Parallel Paragraphs from World Literature\u003c/td\u003e\n\u003ctd\u003eKatherine Thai, Marzena Karpinska, Kalpesh Krishna, Moira Inghilleri, John Wieting and Mohit Iyyer\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eDo Decoding Algorithms Capture Discourse Structure in Multi-Modal Tasks? A Case Study of Image Paragraph Generation\u003c/td\u003e\n\u003ctd\u003eNikolai Ilinykh and Simon Dobnik\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e20Q: Overlap-Free World Knowledge Benchmark for Language Models\u003c/td\u003e\n\u003ctd\u003eMaxime De Bruyn, Ehsan Lotfi, Jeska Buhmann and Walter Daelemans\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eControllable Factuality in Document-Grounded Dialog Systems Using a Noisy Channel Model\u003c/td\u003e\n\u003ctd\u003eNico Daheim, David Thulke, Christian Dugast and Hermann Ney\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eLearning to Model Editing Processes\u003c/td\u003e\n\u003ctd\u003eMachel Reid and Graham Neubig\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eOn the Effectiveness of Automated Metrics for Text Generation Systems\u003c/td\u003e\n\u003ctd\u003ePius von Däniken, Jan Deriu, Don Tuggener and Mark Cieliebak\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eResidual Learning of Neural Text Generation with n-gram Language Model\u003c/td\u003e\n\u003ctd\u003eHuayang Li, Deng Cai, Jin Xu and Taro Watanabe\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eHe Said, She Said: Style Transfer for Shifting the Perspective of Dialogues\u003c/td\u003e\n\u003ctd\u003eAmanda Bertsch, Graham Neubig and Matthew R. Gormley\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEtriCA: Event-Triggered Context-Aware Story Generation Augmented by Cross Attention\u003c/td\u003e\n\u003ctd\u003eChen Tang, Chenghua Lin, Henglin Huang, Frank Guerin and Zhihao Zhang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eKnowledge Graph Generation From Text\u003c/td\u003e\n\u003ctd\u003eIgor Melnyk, Pierre Dognin and Payel Das\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eLearning When and What to Quote: A Quotation Recommender System with Mutual Promotion of Recommendation and Generation\u003c/td\u003e\n\u003ctd\u003eLingzhi Wang, Xingshan Zeng and Kam-Fai Wong\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eDiscord Questions: A Computational Approach To Diversity Analysis in News Coverage\u003c/td\u003e\n\u003ctd\u003ePhilippe Laban, Chien-Sheng Wu, Lidiya Murakhovs'ka, Xiang Chen and Caiming Xiong\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eCONSISTENT: Open-Ended Question Generation From News Articles\u003c/td\u003e\n\u003ctd\u003eTuhin Chakrabarty, Justin Lewis and Smaranda Muresan\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTable-To-Text generation and pre-training with TabT5\u003c/td\u003e\n\u003ctd\u003eEwa Andrejczuk, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene and Yasemin Altun\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch3 id=\"user-content-poster-session---virtual\"\u003ePoster Session - Virtual\u003c/h3\u003e\n\u003cp\u003ePresenters can choose which of the sessions they want to attend for their posters.\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eGenerating Coherent Narratives with Subtopic Planning to Answer How-to Questions\u003c/td\u003e\n\u003ctd\u003ePengshan Cai, Mo Yu, Fei Liu and hong yu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eSemantic Similarity as a Window into Vector- and Graph-Based Metrics\u003c/td\u003e\n\u003ctd\u003eWai Ching Leung, Shira Wein and Nathan Schneider\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWikiOmnia: filtration and evaluation of the generated QA corpus on the whole Russian Wikipedia\u003c/td\u003e\n\u003ctd\u003eDina Pisarevskaya and Tatiana Shavrina\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eModel Criticism for Long-Form Text Generation (Non-Archival)\u003c/td\u003e\n\u003ctd\u003eYuntian Deng, Volodymyr Kuleshov and Alexander Rush\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eControllable Text Generation for All Ages: Evaluating a Plug-and-Play Approach to Age-Adapted Dialogue\u003c/td\u003e\n\u003ctd\u003eLennert Jansen, Štěpán Lars Laichter, Arabella Sinclair, Margot van der Goot, Raquel Fernandez and Sandro Pezzelle\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTemplate-based Contact Email Generation for Job Recommendation\u003c/td\u003e\n\u003ctd\u003eQiuchi Li and Christina Lioma\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eCompression, Transduction, and Creation: A Unified Framework for Evaluating Natural Language Generation\u003c/td\u003e\n\u003ctd\u003eMingkai Deng, Bowen Tan, Zhengzhong Liu, Eric Xing and Zhiting Hu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAre Abstractive Summarization Models truly `Abstractive'? An Empirical Study to Compare the two Forms of Summarization\u003c/td\u003e\n\u003ctd\u003eVinayshekhar Bannihatti Kumar and Rashmi Gangadharaiah\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTowards Attribute-Entangled Controllable Text Generation: A Pilot Study of Blessing Generation\u003c/td\u003e\n\u003ctd\u003eShulin Huang, Shirong Ma, Yinghui Li, Li Yangning, Shiyang Lin, Haitao Zheng and Ying Shen\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNearest Neighbor Language Models for Stylistic Controllable Generation\u003c/td\u003e\n\u003ctd\u003eSeverino Trotta, Lucie Flek and Charles Welch\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eOn reporting scores and agreement for error annotation tasks\u003c/td\u003e\n\u003ctd\u003eMaja Popović and Anya Belz\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eImproved Evaluation of Automatic Source Code Summarisation\u003c/td\u003e\n\u003ctd\u003eJesse Phillips, David Bowes, Mahmoud El-Haj and Tracy Hall\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eMost NLG is Low-Resource: here's what we can do about it\u003c/td\u003e\n\u003ctd\u003eDavid M. Howcroft and Dimitra Gkatzia\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWhat's in a (dataset's) name? The case of BigPatent\u003c/td\u003e\n\u003ctd\u003eSilvia Casola, Alberto Lavelli and Horacio Saggion\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eMultilingual Social Media Text Generation and Evaluation with Few-Shot Prompting\u003c/td\u003e\n\u003ctd\u003eMack Blackburn\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eFactual Error Correction for Abstractive Summaries Using Entity Retrieval\u003c/td\u003e\n\u003ctd\u003eHwanhee Lee, Cheoneum Park, Seunghyun Yoon, Trung Bui, Franck Dernoncourt, Juae Kim and Kyomin Jung\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eCoherent Long Text Generation by Contrastive Soft Prompt\u003c/td\u003e\n\u003ctd\u003eGuandan Chen, Jiashu Pu, Yadong Xi and Rongsheng Zhang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eImproving Dialogue Act Recognition with Augmented Data\u003c/td\u003e\n\u003ctd\u003eKhyati Mahajan, Soham Parikh, Quaizar Vohra, Mitul Tiwari and Samira Shaikh\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWhat Was Your Name Again? Interrogating Generative Conversational Models For Factual Consistency Evaluation\u003c/td\u003e\n\u003ctd\u003eEhsan Lotfi, Maxime De Bruyn, Jeska Buhmann and Walter Daelemans\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNarrative Why-Question Answering: A Review of Challenges and Datasets\u003c/td\u003e\n\u003ctd\u003eEmil Kalbaliyev and Kairit Sirts\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eExploring a POS-based Two-stage Approach for Improving Low-Resource AMR-to-Text Generation\u003c/td\u003e\n\u003ctd\u003eMarco Antonio Sobrevilla Cabezudo and Thiago Pardo\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWhat Makes Data-to-Text Generation Hard for Pretrained Language Models?\u003c/td\u003e\n\u003ctd\u003eMoniba Keymanesh, Adrian Benton, Mark Dredze\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eDon't Say What You Don't Know: Improving the Consistency of Abstractive Summarization by Constraining Beam Search\u003c/td\u003e\n\u003ctd\u003eDaniel King, Zejiang Shen, Nishant Subramani, Daniel S Weld, Iz Beltagy, Doug Downey\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eRepresentation Learning for Resource-Constrained Keyphrase Generation\u003c/td\u003e\n\u003ctd\u003eDi Wu, Wasi U. Ahmad, Sunipa Dev and Kai-Wei Chang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEfficient (Soft) Q-Learning for Text Generation with Limited Good Data\u003c/td\u003e\n\u003ctd\u003eHan Guo, Bowen Tan, Zhengzhong Liu, Eric Xing and Zhiting Hu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWish I Can Feel What You Feel: A Neural Approach for Empathetic Response Generation\u003c/td\u003e\n\u003ctd\u003eYangbin Chen and Chunfeng Liang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eText Editing as Imitation Game\u003c/td\u003e\n\u003ctd\u003eNing Shi, Bin Tang, Bo Yuan, Longtao Huang, Yewen Pu, Jie Fu and Zhouhan Lin\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAudience-Centric Natural Language Generation via Style Infusion\u003c/td\u003e\n\u003ctd\u003eSamraj Moorjani, Adit Krishnan, Hari Sundaram, Ewa Maslowska and Aravind Sankar\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eGrounded Keys-to-Text Generation: Towards Factual Open-Ended Generation\u003c/td\u003e\n\u003ctd\u003eFaeze Brahman, Baolin Peng, Michel Galley, Sudha Rao, Bill Dolan, Snigdha Chaturvedi and Jianfeng Gao\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEmpathetic Dialogue Generation via Sensitive Emotion Recognition and Sensible Knowledge Selection\u003c/td\u003e\n\u003ctd\u003eLanrui Wang, Jiangnan Li, Zheng Lin, Fandong Meng, Chenxu Yang, Weiping Wang and Jie Zhou\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eHeLo: Learning-Free Lookahead Decoding for Conversation Infilling\u003c/td\u003e\n\u003ctd\u003eIvan Lee and Taylor Berg-Kirkpatrick\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eData-Efficient Concept Extraction from Pre-trained Language Models for Commonsense Explanation Generation\u003c/td\u003e\n\u003ctd\u003eYanbo Fang and Yongfeng Zhang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eMCPG: A Flexible Multi-Level Controllable Framework for Unsupervised Paraphrase Generation\u003c/td\u003e\n\u003ctd\u003eYi Chen, Haiyun Jiang, Lemao Liu, Rui Wang, Shuming Shi and Ruifeng Xu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eParaMac: A General Unsupervised Paraphrase Generation Framework Leveraging Semantic Constraints and Diversifying Mechanisms\u003c/td\u003e\n\u003ctd\u003eJinxin Liu, Jiaxin Shi, Ji Qi, Lei Hou, Juanzi Li and Qi Tian\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eRecurrence Boosts Diversity! Revisiting Recurrent Latent Variable in Transformer-Based Variational AutoEncoder for Diverse Text Generation\u003c/td\u003e\n\u003ctd\u003eJinyi Hu, Xiaoyuan Yi, Wenhao Li, Maosong Sun and Xing Xie\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eConsecutive Question Generation via Dynamic Multitask Learning\u003c/td\u003e\n\u003ctd\u003eYunji Li, Sujian Li and Xing Shi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eSequentially Controlled Text Generation\u003c/td\u003e\n\u003ctd\u003eAlexander Spangher, Yao Ming, Xinyu Hua and Nanyun Peng\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eInferring the Reader: Guiding Automated Story Generation with Commonsense Reasoning\u003c/td\u003e\n\u003ctd\u003eXiangyu Peng, Siyan Li, Sarah Wiegreffe and Mark Riedl\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eGuiding Neural Story Generation with Reader Models\u003c/td\u003e\n\u003ctd\u003eXiangyu Peng, Kaige Xie, Amal Alabdulkarim, Harshith Kayam, Samihan Dani and Mark Riedl\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTemporal Prompts for Conditional Text Generation\u003c/td\u003e\n\u003ctd\u003eShuyang Cao and Lu Wang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eA Framework for Automatic Generation of Spoken Question-Answering Data\u003c/td\u003e\n\u003ctd\u003eMerve Ünlü Menevşe, Yusufcan Manav, Ebru Arisoy and Arzucan Özgür\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNot All Errors are Equal: Learning Text Generation Metrics using Stratified Error Synthesis\u003c/td\u003e\n\u003ctd\u003eWenda Xu, Yi-Lin Tuan, Yujie Lu, Michael S. Saxon, Lei Li and William Yang Wang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWANLI: Worker and AI Collaboration for Natural Language Inference Dataset Creation\u003c/td\u003e\n\u003ctd\u003eAlisa Liu, Swabha Swayamdipta, Noah A. Smith and Yejin Choi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003ePlug-and-Play Recipe Generation with Content Planning\u003c/td\u003e\n\u003ctd\u003eYinhong Liu, Yixuan n/a Su, Ehsan Shareghi and Nigel Collier\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h2\u003e\n\u003cp\u003e\u003ccode\u003eDecember 7\u003c/code\u003e Workshop Date\u003c/p\u003e\n\u003ch3 id=\"user-content-organization\"\u003eOrganization\u003c/h3\u003e\n\u003cul\u003e\n\u003cli\u003eAntoine Bosselut (EPFL)\u003c/li\u003e\n\u003cli\u003eKhyathi Chandu (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eKaustubh Dhole (Emory University)\u003c/li\u003e\n\u003cli\u003eVarun Gangal (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eSebastian Gehrmann (Google Research)\u003c/li\u003e\n\u003cli\u003eYacine Jernite (Hugging Face)\u003c/li\u003e\n\u003cli\u003eJekaterina Novikova (NoOverfitting Lab)\u003c/li\u003e\n\u003cli\u003eLaura Perez-Beltrachini (University of Edinburgh)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eSteering Committee\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eWei Xu (Georgia Tech)\u003c/li\u003e\n\u003cli\u003eEsin Durmus (Stanford University)\u003c/li\u003e\n\u003cli\u003eSamira Shaikh (UNC Charlotte)\u003c/li\u003e\n\u003c/ul\u003e\n"}},"__N_SSG":true},"page":"/workshop/2022","query":{},"buildId":"b8rjfKshCOVHfiTDQnV_D","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003cp\u003eThe Second Version of \u003ca href=\"https://gem-benchmark.com/\"\u003eGeneration, Evaluation \u0026#x26; Metrics (GEM) Workshop 2022\u003c/a\u003e workshop will be held as part of \u003ca href=\"https://2022.emnlp.org/\"\u003eEMNLP\u003c/a\u003e, December 7, 2022. It is endorsed by the ACL Special Interest Group on Natural Language Generation (\u003ca href=\"https://aclweb.org/aclwiki/SIGGEN\"\u003eSIGGEN\u003c/a\u003e).\u003c/p\u003e\n\u003cp\u003eThe workshop will be held in hybrid mode with sessions in-person and via the conference portal.\u003c/p\u003e\n\u003ch2 id=\"user-content-schedule\"\u003eSchedule\u003c/h2\u003e\n\u003cp\u003eAll times in Gulf Standard Time, please use a converter like \u003ca href=\"https://www.timeanddate.com/worldclock/converter.html?iso=20210713T040000\u0026#x26;p1=1440\u0026#x26;p2=2\u0026#x26;p3=179\u0026#x26;p4=136\u0026#x26;p5=224\u0026#x26;p6=33\u0026#x26;p7=248\"\u003ethis one\u003c/a\u003e to convert to your local time.\nTo accomodate attendees from as many time zones as possible, we will have a virtual-only part in the evening.\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eIn-Person and Virtual\u003c/th\u003e\n\u003cth\u003eStart\u003c/th\u003e\n\u003cth\u003eEnd\u003c/th\u003e\n\u003cth\u003e\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e9:00\u003c/td\u003e\n\u003ctd\u003e10:30\u003c/td\u003e\n\u003ctd\u003eOpening Remarks and Keynote 1 (Sean Welleck)\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e10:30\u003c/td\u003e\n\u003ctd\u003e11:00\u003c/td\u003e\n\u003ctd\u003eCoffee Break\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e11:00\u003c/td\u003e\n\u003ctd\u003e12:30\u003c/td\u003e\n\u003ctd\u003eTalk Session 1\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e12:30\u003c/td\u003e\n\u003ctd\u003e14:00\u003c/td\u003e\n\u003ctd\u003eLunch\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e14:00\u003c/td\u003e\n\u003ctd\u003e15:30\u003c/td\u003e\n\u003ctd\u003ePoster Session\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e15:30\u003c/td\u003e\n\u003ctd\u003e16:00\u003c/td\u003e\n\u003ctd\u003eCoffee Break\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e16:00\u003c/td\u003e\n\u003ctd\u003e17:00\u003c/td\u003e\n\u003ctd\u003eKeynote 2 (Timo Schick)\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e17:00\u003c/td\u003e\n\u003ctd\u003e18:30\u003c/td\u003e\n\u003ctd\u003eTalk Session 2\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003cstrong\u003eVirtual-Only Part\u003c/strong\u003e\u003c/td\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e20:00\u003c/td\u003e\n\u003ctd\u003e21:00\u003c/td\u003e\n\u003ctd\u003eKeynote 3 (Emily Dinan)\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e21:00\u003c/td\u003e\n\u003ctd\u003e22:30\u003c/td\u003e\n\u003ctd\u003ePoster Session\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-keynotes\"\u003eKeynotes\u003c/h2\u003e\n\u003ch3 id=\"user-content-keynote-1---sean-welleck\"\u003eKeynote 1 - Sean Welleck\u003c/h3\u003e\n\u003ch4 id=\"user-content-reflections-on-trusting-untrustworthy-language-generators\"\u003eReflections on Trusting Untrustworthy Language Generators\u003c/h4\u003e\n\u003cp\u003e\u003cstrong\u003eABSTRACT\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eIn his 1984 Turing Award Lecture “Reflections on Trusting Trust”, Ken Thompson famously said “You can’t trust code that you did not totally create yourself”. These words are especially relevant today, as powerful and flexible language models generate natural language and code that is increasingly human-like. However, these same systems challenge our trust, exhibiting odd degeneracies, amplifying biases, and producing flawed reasoning. In this talk, I will introduce two directions for harnessing the potential of these language models while mitigating the risks. First, I will discuss unlearning: removing undesirable behaviors by integrating feedback and learning. Second, I will discuss how integrating language models with trustworthy symbolic systems can open the door to tackling challenging mathematical reasoning tasks. Join me as we explore the path towards trusting untrustworthy language generators.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eBIO\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eSean Welleck is a Postdoctoral Scholar at the University of Washington and the Allen Institute for Artificial Intelligence, working with Yejin Choi. His research focuses on algorithms for natural language generation and machine reasoning, with the aim of minimizing the effort needed to trust the output of AI systems. He has developed unlearning, decoding, and evaluation algorithms for controllable neural language generation, and methods for integrating language models with symbolic systems, with a particular focus on mathematical reasoning. He received his Ph.D. from New York University, where he was advised by Kyunghyun Cho. Outside of his research activities, he hosts the Thesis Review Podcast and enjoys running long distances.\u003c/p\u003e\n\u003ch3 id=\"user-content-keynote-2---timo-schick\"\u003eKeynote 2 - Timo Schick\u003c/h3\u003e\n\u003ch4 id=\"user-content-instructable-and-collaborative-language-models\"\u003eInstructable and Collaborative Language Models\u003c/h4\u003e\n\u003cp\u003e\u003cstrong\u003eABSTRACT\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eTextual content is often the output of a collaborative writing process — which includes writing text, making comments and changes, finding references, and asking others for help —, but today’s NLP models are only trained to generate the final output of this process. In this talk, we will discuss an alternative approach where models are trained to imitate the entire writing process. We will look at examples of how this enables models to plan and explain their actions, to correct their own mistakes, and to better collaborate with humans. We will also discuss how to make such models better at following human-written instructions.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eBIO\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eTimo Schick is a research scientist at FAIR working on few-shot learning in NLP. Previously, he did his PhD at the Center for Information and Language Processing (CIS) in Munich and worked in industry as a data scientist for several years. Timo's current research focuses on instruction-based learning and teaching language models to collaborate with other entities.\u003c/p\u003e\n\u003ch3 id=\"user-content-keynote-3---emily-dinan\"\u003eKeynote 3 - Emily Dinan\u003c/h3\u003e\n\u003ch4 id=\"user-content-challenges-in-evaluating-safety-for-llms\"\u003eChallenges in evaluating safety for LLMs\u003c/h4\u003e\n\u003cp\u003e\u003cstrong\u003eABSTRACT\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eWhile research on large language models (LLMs) continues to accelerate, much recent work has called attention to anticipated risks and harms from their use in society. We will discuss challenges in evaluating the relative safety of these models as well as current approaches for doing so. Finally, we will highlight avenues for future research into evaluating and mitigating these harms.\u003c/p\u003e\n\u003cp\u003e\u003cstrong\u003eBIO\u003c/strong\u003e\u003c/p\u003e\n\u003cp\u003eEmily Dinan is a Research Engineer at FAIR (Meta AI) in New York. Her research interests include conversational AI, natural language processing, and safety and responsibility in these fields. Recently she has focused on methods for preventing conversational agents from reproducing biased, toxic, or otherwise harmful language. Prior to joining FAIR, she received her master's degree in Mathematics from the University of Washington.\u003c/p\u003e\n\u003ch2 id=\"user-content-sessions-and-papers\"\u003eSessions and Papers\u003c/h2\u003e\n\u003ch3 id=\"user-content-talk-session-1\"\u003eTalk Session 1\u003c/h3\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003cth\u003eMode\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eDEMETR: Diagnosing Evaluation Metrics for Translation\u003c/td\u003e\n\u003ctd\u003eMarzena Karpinska, Nishant Raj, Katherine Thai, Yixiao Song, Ankita Gupta and Mohit Iyyer\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eBOOKSUM: A Collection of Datasets for Long-form Narrative Summarization\u003c/td\u003e\n\u003ctd\u003eWojciech Kryscinski, Nazneen Rajani, Divyansh Agarwal, Caiming Xiong and Dragomir Radev\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eA Survey of Recent Error Annotation Schemes for Automatically Generated Text\u003c/td\u003e\n\u003ctd\u003eRudali Huidrom and Anya Belz\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTruncation Sampling as Language Model Desmoothing\u003c/td\u003e\n\u003ctd\u003eJohn Hewitt, Christopher Manning and Percy Liang\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eError Analysis of ToTTo Table-to-Text Neural NLG Models\u003c/td\u003e\n\u003ctd\u003eBarkavi Sundararajan, Somayajulu Sripada and Ehud Reiter\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTowards Credible Human Evaluation of Open-Domain Dialog Systems Using Interactive Setup\u003c/td\u003e\n\u003ctd\u003eSijia Liu, Patrick Lange, Behnam Hedayatnia, Alexandros Papangelis, Di Jin, Andrew Wirth, Yang Liu and Dilek Hakkani-Tur\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch3 id=\"user-content-talk-session-2\"\u003eTalk Session 2\u003c/h3\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003cth\u003eMode\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eImproving abstractive summarization with energy-based re-ranking\u003c/td\u003e\n\u003ctd\u003eDiogo Pernes, Afonso Mendes and André F. T. Martins\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eRevisiting text decomposition methods for NLI-based factuality scoring of summaries\u003c/td\u003e\n\u003ctd\u003eJohn Glover, Federico Fancellu, Vasudevan Jagannathan, Matthew R. Gormley and Thomas Schaaf\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eA Corpus and Evaluation for Predicting Semi-Structured Human Annotations\u003c/td\u003e\n\u003ctd\u003eAndreas Marfurt, Ashley Thornton, David Sylvan, Lonneke van der Plas and James Henderson\u003c/td\u003e\n\u003ctd\u003eIn Person\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAnswerability: A custom metric for evaluating chatbot performance\u003c/td\u003e\n\u003ctd\u003ePranav Gupta, Anand A. Rajasekar, Amisha Patel, Mandar Kulkarni, Alexander Sunell, Kyung Kim and Anusua Trivedi\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eControl Prefixes for Parameter-Efficient Text Generation\u003c/td\u003e\n\u003ctd\u003eJordan Clive, Kris Cao and Marek Rei\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAssessing Inter-metric Correlation for Multi-document Summarization Evaluation\u003c/td\u003e\n\u003ctd\u003eMichael Ridenour, Ameeta Agrawal and Olubusayo Olabisi\u003c/td\u003e\n\u003ctd\u003eVirtual\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch3 id=\"user-content-poster-session---in-person\"\u003ePoster Session - In-Person\u003c/h3\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eTask-driven augmented data evaluation\u003c/td\u003e\n\u003ctd\u003eOlga Golovneva, Pan Wei, Khadige Abboud, Charith Peris, Lizhen Tan and Haiyang Yu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWeakly Supervised Context-based Interview Question Generation\u003c/td\u003e\n\u003ctd\u003eSamiran Pal, Kaamraan Khan, Avinash Kumar Singh, Subhasish Ghosh, Tapas Nayak, Girish Palshikar and Indrajit Bhattacharya\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAnalyzing Multi-Task Learning for Abstractive Text Summarization\u003c/td\u003e\n\u003ctd\u003eFrederic Thomas Kirstein, Jan Philip Wahle, Terry Ruas and Bela Gipp\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eCLSE: Corpus of Linguistically Significant Entities\u003c/td\u003e\n\u003ctd\u003eAleksandr Chuklin, Justin Zhao and Mihir Kale\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTowards In-Context Non-Expert Evaluation of Reflection Generation for Counselling Conversations\u003c/td\u003e\n\u003ctd\u003eZixiu Wu, Simone Balloccu, Rim Helaoui, Diego Reforgiato Recupero and Daniele Riboni\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEvaluation of Response Generation Models: Shouldn't It Be Shareable and Replicable?\u003c/td\u003e\n\u003ctd\u003eSeyed Mahed Mousavi, Gabriel Roccabruna, Michela Lorandi, Simone Caldarella and Giuseppe Riccardi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEnhancing and Evaluating the Grammatical Framework Approach to Logic-to-Text Generation\u003c/td\u003e\n\u003ctd\u003eEduardo Calò, Elze van der Werf, Albert Gatt and Kees van Deemter\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTransfer learning for multilingual vacancy text generation\u003c/td\u003e\n\u003ctd\u003eAnna Lorincz, David Graus, Dor Lavi and Joao Lebre Magalhaes Pereira\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEdiT5: Semi-Autoregressive Text Editing with T5 Warm-Start\u003c/td\u003e\n\u003ctd\u003eJonathan Mallinson, Jakub Adamek, Eric Malmi and Aliaksei Severyn\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eUnsupervised Token-level Hallucination Detection from Summary Generation By-products\u003c/td\u003e\n\u003ctd\u003eAndreas Marfurt and James Henderson\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eT5QL: Taming language models for SQL generation\u003c/td\u003e\n\u003ctd\u003eSamuel David Arcadinho, David Aparicio, Hugo Veiga and Antonio Alegria\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eHuman perceiving behavior modeling in evaluation of code generation models\u003c/td\u003e\n\u003ctd\u003eSergey V. Kovalchuk, Vadim Lomshakov and Artem Aliev\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eGiCCS: A German in-Context Conversational Similarity Benchmark\u003c/td\u003e\n\u003ctd\u003eShima Asaadi, Zahra Kolagar, Alina Liebel and Alessandra Zarcone\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eMeasuring the Measuring Tools: An Automatic Evaluation of Semantic Metrics for Text Corpora\u003c/td\u003e\n\u003ctd\u003eGeorge Kour, Samuel Ackerman, Eitan Daniel Farchi, Orna Raz, Boaz Carmeli and Ateret Anaby Tavor\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eLongEval: Guidelines for Human Evaluation of Faithfulness in Long-form Summarization\u003c/td\u003e\n\u003ctd\u003eKalpesh Krishna, Erin Bransom, Bailey Kuehl, Mohit Iyyer, Arman Cohan, Pradeep Dasigi and Kyle Lo\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eExploring Document-Level Literary Machine Translation with Parallel Paragraphs from World Literature\u003c/td\u003e\n\u003ctd\u003eKatherine Thai, Marzena Karpinska, Kalpesh Krishna, Moira Inghilleri, John Wieting and Mohit Iyyer\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eDo Decoding Algorithms Capture Discourse Structure in Multi-Modal Tasks? A Case Study of Image Paragraph Generation\u003c/td\u003e\n\u003ctd\u003eNikolai Ilinykh and Simon Dobnik\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e20Q: Overlap-Free World Knowledge Benchmark for Language Models\u003c/td\u003e\n\u003ctd\u003eMaxime De Bruyn, Ehsan Lotfi, Jeska Buhmann and Walter Daelemans\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eControllable Factuality in Document-Grounded Dialog Systems Using a Noisy Channel Model\u003c/td\u003e\n\u003ctd\u003eNico Daheim, David Thulke, Christian Dugast and Hermann Ney\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eLearning to Model Editing Processes\u003c/td\u003e\n\u003ctd\u003eMachel Reid and Graham Neubig\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eOn the Effectiveness of Automated Metrics for Text Generation Systems\u003c/td\u003e\n\u003ctd\u003ePius von Däniken, Jan Deriu, Don Tuggener and Mark Cieliebak\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eResidual Learning of Neural Text Generation with n-gram Language Model\u003c/td\u003e\n\u003ctd\u003eHuayang Li, Deng Cai, Jin Xu and Taro Watanabe\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eHe Said, She Said: Style Transfer for Shifting the Perspective of Dialogues\u003c/td\u003e\n\u003ctd\u003eAmanda Bertsch, Graham Neubig and Matthew R. Gormley\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEtriCA: Event-Triggered Context-Aware Story Generation Augmented by Cross Attention\u003c/td\u003e\n\u003ctd\u003eChen Tang, Chenghua Lin, Henglin Huang, Frank Guerin and Zhihao Zhang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003ctd\u003e\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eKnowledge Graph Generation From Text\u003c/td\u003e\n\u003ctd\u003eIgor Melnyk, Pierre Dognin and Payel Das\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eLearning When and What to Quote: A Quotation Recommender System with Mutual Promotion of Recommendation and Generation\u003c/td\u003e\n\u003ctd\u003eLingzhi Wang, Xingshan Zeng and Kam-Fai Wong\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eDiscord Questions: A Computational Approach To Diversity Analysis in News Coverage\u003c/td\u003e\n\u003ctd\u003ePhilippe Laban, Chien-Sheng Wu, Lidiya Murakhovs'ka, Xiang Chen and Caiming Xiong\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eCONSISTENT: Open-Ended Question Generation From News Articles\u003c/td\u003e\n\u003ctd\u003eTuhin Chakrabarty, Justin Lewis and Smaranda Muresan\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTable-To-Text generation and pre-training with TabT5\u003c/td\u003e\n\u003ctd\u003eEwa Andrejczuk, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene and Yasemin Altun\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch3 id=\"user-content-poster-session---virtual\"\u003ePoster Session - Virtual\u003c/h3\u003e\n\u003cp\u003ePresenters can choose which of the sessions they want to attend for their posters.\u003c/p\u003e\n\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth\u003eTitle\u003c/th\u003e\n\u003cth\u003eAuthors\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd\u003eGenerating Coherent Narratives with Subtopic Planning to Answer How-to Questions\u003c/td\u003e\n\u003ctd\u003ePengshan Cai, Mo Yu, Fei Liu and hong yu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eSemantic Similarity as a Window into Vector- and Graph-Based Metrics\u003c/td\u003e\n\u003ctd\u003eWai Ching Leung, Shira Wein and Nathan Schneider\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWikiOmnia: filtration and evaluation of the generated QA corpus on the whole Russian Wikipedia\u003c/td\u003e\n\u003ctd\u003eDina Pisarevskaya and Tatiana Shavrina\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eModel Criticism for Long-Form Text Generation (Non-Archival)\u003c/td\u003e\n\u003ctd\u003eYuntian Deng, Volodymyr Kuleshov and Alexander Rush\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eControllable Text Generation for All Ages: Evaluating a Plug-and-Play Approach to Age-Adapted Dialogue\u003c/td\u003e\n\u003ctd\u003eLennert Jansen, Štěpán Lars Laichter, Arabella Sinclair, Margot van der Goot, Raquel Fernandez and Sandro Pezzelle\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTemplate-based Contact Email Generation for Job Recommendation\u003c/td\u003e\n\u003ctd\u003eQiuchi Li and Christina Lioma\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eCompression, Transduction, and Creation: A Unified Framework for Evaluating Natural Language Generation\u003c/td\u003e\n\u003ctd\u003eMingkai Deng, Bowen Tan, Zhengzhong Liu, Eric Xing and Zhiting Hu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAre Abstractive Summarization Models truly `Abstractive'? An Empirical Study to Compare the two Forms of Summarization\u003c/td\u003e\n\u003ctd\u003eVinayshekhar Bannihatti Kumar and Rashmi Gangadharaiah\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTowards Attribute-Entangled Controllable Text Generation: A Pilot Study of Blessing Generation\u003c/td\u003e\n\u003ctd\u003eShulin Huang, Shirong Ma, Yinghui Li, Li Yangning, Shiyang Lin, Haitao Zheng and Ying Shen\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNearest Neighbor Language Models for Stylistic Controllable Generation\u003c/td\u003e\n\u003ctd\u003eSeverino Trotta, Lucie Flek and Charles Welch\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eOn reporting scores and agreement for error annotation tasks\u003c/td\u003e\n\u003ctd\u003eMaja Popović and Anya Belz\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eImproved Evaluation of Automatic Source Code Summarisation\u003c/td\u003e\n\u003ctd\u003eJesse Phillips, David Bowes, Mahmoud El-Haj and Tracy Hall\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eMost NLG is Low-Resource: here's what we can do about it\u003c/td\u003e\n\u003ctd\u003eDavid M. Howcroft and Dimitra Gkatzia\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWhat's in a (dataset's) name? The case of BigPatent\u003c/td\u003e\n\u003ctd\u003eSilvia Casola, Alberto Lavelli and Horacio Saggion\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eMultilingual Social Media Text Generation and Evaluation with Few-Shot Prompting\u003c/td\u003e\n\u003ctd\u003eMack Blackburn\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eFactual Error Correction for Abstractive Summaries Using Entity Retrieval\u003c/td\u003e\n\u003ctd\u003eHwanhee Lee, Cheoneum Park, Seunghyun Yoon, Trung Bui, Franck Dernoncourt, Juae Kim and Kyomin Jung\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eCoherent Long Text Generation by Contrastive Soft Prompt\u003c/td\u003e\n\u003ctd\u003eGuandan Chen, Jiashu Pu, Yadong Xi and Rongsheng Zhang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eImproving Dialogue Act Recognition with Augmented Data\u003c/td\u003e\n\u003ctd\u003eKhyati Mahajan, Soham Parikh, Quaizar Vohra, Mitul Tiwari and Samira Shaikh\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWhat Was Your Name Again? Interrogating Generative Conversational Models For Factual Consistency Evaluation\u003c/td\u003e\n\u003ctd\u003eEhsan Lotfi, Maxime De Bruyn, Jeska Buhmann and Walter Daelemans\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNarrative Why-Question Answering: A Review of Challenges and Datasets\u003c/td\u003e\n\u003ctd\u003eEmil Kalbaliyev and Kairit Sirts\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eExploring a POS-based Two-stage Approach for Improving Low-Resource AMR-to-Text Generation\u003c/td\u003e\n\u003ctd\u003eMarco Antonio Sobrevilla Cabezudo and Thiago Pardo\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWhat Makes Data-to-Text Generation Hard for Pretrained Language Models?\u003c/td\u003e\n\u003ctd\u003eMoniba Keymanesh, Adrian Benton, Mark Dredze\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eDon't Say What You Don't Know: Improving the Consistency of Abstractive Summarization by Constraining Beam Search\u003c/td\u003e\n\u003ctd\u003eDaniel King, Zejiang Shen, Nishant Subramani, Daniel S Weld, Iz Beltagy, Doug Downey\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eRepresentation Learning for Resource-Constrained Keyphrase Generation\u003c/td\u003e\n\u003ctd\u003eDi Wu, Wasi U. Ahmad, Sunipa Dev and Kai-Wei Chang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEfficient (Soft) Q-Learning for Text Generation with Limited Good Data\u003c/td\u003e\n\u003ctd\u003eHan Guo, Bowen Tan, Zhengzhong Liu, Eric Xing and Zhiting Hu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWish I Can Feel What You Feel: A Neural Approach for Empathetic Response Generation\u003c/td\u003e\n\u003ctd\u003eYangbin Chen and Chunfeng Liang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eText Editing as Imitation Game\u003c/td\u003e\n\u003ctd\u003eNing Shi, Bin Tang, Bo Yuan, Longtao Huang, Yewen Pu, Jie Fu and Zhouhan Lin\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eAudience-Centric Natural Language Generation via Style Infusion\u003c/td\u003e\n\u003ctd\u003eSamraj Moorjani, Adit Krishnan, Hari Sundaram, Ewa Maslowska and Aravind Sankar\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eGrounded Keys-to-Text Generation: Towards Factual Open-Ended Generation\u003c/td\u003e\n\u003ctd\u003eFaeze Brahman, Baolin Peng, Michel Galley, Sudha Rao, Bill Dolan, Snigdha Chaturvedi and Jianfeng Gao\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eEmpathetic Dialogue Generation via Sensitive Emotion Recognition and Sensible Knowledge Selection\u003c/td\u003e\n\u003ctd\u003eLanrui Wang, Jiangnan Li, Zheng Lin, Fandong Meng, Chenxu Yang, Weiping Wang and Jie Zhou\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eHeLo: Learning-Free Lookahead Decoding for Conversation Infilling\u003c/td\u003e\n\u003ctd\u003eIvan Lee and Taylor Berg-Kirkpatrick\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eData-Efficient Concept Extraction from Pre-trained Language Models for Commonsense Explanation Generation\u003c/td\u003e\n\u003ctd\u003eYanbo Fang and Yongfeng Zhang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eMCPG: A Flexible Multi-Level Controllable Framework for Unsupervised Paraphrase Generation\u003c/td\u003e\n\u003ctd\u003eYi Chen, Haiyun Jiang, Lemao Liu, Rui Wang, Shuming Shi and Ruifeng Xu\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eParaMac: A General Unsupervised Paraphrase Generation Framework Leveraging Semantic Constraints and Diversifying Mechanisms\u003c/td\u003e\n\u003ctd\u003eJinxin Liu, Jiaxin Shi, Ji Qi, Lei Hou, Juanzi Li and Qi Tian\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eRecurrence Boosts Diversity! Revisiting Recurrent Latent Variable in Transformer-Based Variational AutoEncoder for Diverse Text Generation\u003c/td\u003e\n\u003ctd\u003eJinyi Hu, Xiaoyuan Yi, Wenhao Li, Maosong Sun and Xing Xie\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eConsecutive Question Generation via Dynamic Multitask Learning\u003c/td\u003e\n\u003ctd\u003eYunji Li, Sujian Li and Xing Shi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eSequentially Controlled Text Generation\u003c/td\u003e\n\u003ctd\u003eAlexander Spangher, Yao Ming, Xinyu Hua and Nanyun Peng\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eInferring the Reader: Guiding Automated Story Generation with Commonsense Reasoning\u003c/td\u003e\n\u003ctd\u003eXiangyu Peng, Siyan Li, Sarah Wiegreffe and Mark Riedl\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eGuiding Neural Story Generation with Reader Models\u003c/td\u003e\n\u003ctd\u003eXiangyu Peng, Kaige Xie, Amal Alabdulkarim, Harshith Kayam, Samihan Dani and Mark Riedl\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eTemporal Prompts for Conditional Text Generation\u003c/td\u003e\n\u003ctd\u003eShuyang Cao and Lu Wang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eA Framework for Automatic Generation of Spoken Question-Answering Data\u003c/td\u003e\n\u003ctd\u003eMerve Ünlü Menevşe, Yusufcan Manav, Ebru Arisoy and Arzucan Özgür\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eNot All Errors are Equal: Learning Text Generation Metrics using Stratified Error Synthesis\u003c/td\u003e\n\u003ctd\u003eWenda Xu, Yi-Lin Tuan, Yujie Lu, Michael S. Saxon, Lei Li and William Yang Wang\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003eWANLI: Worker and AI Collaboration for Natural Language Inference Dataset Creation\u003c/td\u003e\n\u003ctd\u003eAlisa Liu, Swabha Swayamdipta, Noah A. Smith and Yejin Choi\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd\u003ePlug-and-Play Recipe Generation with Content Planning\u003c/td\u003e\n\u003ctd\u003eYinhong Liu, Yixuan n/a Su, Ehsan Shareghi and Nigel Collier\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\n\u003ch2 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h2\u003e\n\u003cp\u003e\u003ccode\u003eDecember 7\u003c/code\u003e Workshop Date\u003c/p\u003e\n\u003ch3 id=\"user-content-organization\"\u003eOrganization\u003c/h3\u003e\n\u003cul\u003e\n\u003cli\u003eAntoine Bosselut (EPFL)\u003c/li\u003e\n\u003cli\u003eKhyathi Chandu (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eKaustubh Dhole (Emory University)\u003c/li\u003e\n\u003cli\u003eVarun Gangal (Carnegie Mellon University)\u003c/li\u003e\n\u003cli\u003eSebastian Gehrmann (Google Research)\u003c/li\u003e\n\u003cli\u003eYacine Jernite (Hugging Face)\u003c/li\u003e\n\u003cli\u003eJekaterina Novikova (NoOverfitting Lab)\u003c/li\u003e\n\u003cli\u003eLaura Perez-Beltrachini (University of Edinburgh)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eSteering Committee\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003eWei Xu (Georgia Tech)\u003c/li\u003e\n\u003cli\u003eEsin Durmus (Stanford University)\u003c/li\u003e\n\u003cli\u003eSamira Shaikh (UNC Charlotte)\u003c/li\u003e\n\u003c/ul\u003e\n"}},"__N_SSG":true},"page":"/workshop/2022","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file
diff --git a/workshop/2023-call.html b/workshop/2023-call.html
new file mode 100644
index 00000000..9f94b516
--- /dev/null
+++ b/workshop/2023-call.html
@@ -0,0 +1,62 @@
+<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><link rel="icon" href="/favicon.ico"/><meta name="description" content="Benchmark natural language generation systems with GEM."/><meta property="og:image" content="https://og-image.now.sh/**GEM**%20Benchmark.png?theme=light&amp;md=1&amp;fontSize=100px&amp;images=https%3A%2F%2Fassets.vercel.com%2Fimage%2Fupload%2Ffront%2Fassets%2Fdesign%2Fvercel-triangle-black.svg"/><meta name="og:title" content="GEM"/><meta name="twitter:card" content="summary_large_image"/><title>GEM Workshop 2022</title><meta name="next-head-count" content="8"/><link rel="preload" href="/_next/static/css/86a77084a15a5546.css" as="style"/><link rel="stylesheet" href="/_next/static/css/86a77084a15a5546.css" data-n-g=""/><link rel="preload" href="/_next/static/css/50ad98e60bd49ad7.css" as="style"/><link rel="stylesheet" href="/_next/static/css/50ad98e60bd49ad7.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/_next/static/chunks/webpack-a73844ba913878ac.js" defer=""></script><script src="/_next/static/chunks/framework-7a7e500878b44665.js" defer=""></script><script src="/_next/static/chunks/main-a56c17dda72126ba.js" defer=""></script><script src="/_next/static/chunks/pages/_app-da8862f0ec3a97c1.js" defer=""></script><script src="/_next/static/chunks/c16184b3-ddb1b99b5e568a2a.js" defer=""></script><script src="/_next/static/chunks/50-3dccc3616b494db8.js" defer=""></script><script src="/_next/static/chunks/pages/workshop/2023-call-1b0cb7c36f248bb5.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_buildManifest.js" defer=""></script><script src="/_next/static/V1edrWahfIsCPthpIgASU/_ssgManifest.js" defer=""></script></head><body><div id="__next"><div class="layout_background__oCFQX undefined"><header class="layout_header__SFlEE"><div class="navbar_navwrapper__RkXSe"><div class="navbar_gradbar__Vli6s"></div><nav class="navbar_navbar__vdWdK"><span class="utils_headingLg__RYtYb navbar_navbarlogo__u28NK"><a href="/">GEM BENCHMARK</a></span><div class="navbar_menutoggle__4Urrc" id="mobile-menu"><svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="bars" class="svg-inline--fa fa-bars navbar_bar__f8cyd" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 96C0 78.3 14.3 64 32 64H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32C14.3 128 0 113.7 0 96zM0 256c0-17.7 14.3-32 32-32H416c17.7 0 32 14.3 32 32s-14.3 32-32 32H32c-17.7 0-32-14.3-32-32zM448 416c0 17.7-14.3 32-32 32H32c-17.7 0-32-14.3-32-32s14.3-32 32-32H416c17.7 0 32 14.3 32 32z"></path></svg></div><ul><li class="navbar_navitem__15TsF navbar_pushright___9_8s"><a href="/resources">Resources</a></li><li class="navbar_navitem__15TsF"><a href="/data_cards">Data Cards</a></li><li class="navbar_navitem__15TsF"><a href="/model_cards">Model Cards</a></li><li class="navbar_navitem__15TsF"><a href="/tutorials">tutorials</a></li><li class="navbar_navitem__15TsF"><a href="/results">Results</a></li><li class="navbar_navitem__15TsF"><a href="/papers">Papers</a></li><li class="navbar_navitem__15TsF"><a href="/workshop">Workshop</a></li></ul></nav></div></header><div class="layout_container__FUycR undefined"><main><article><span class="utils_headingXl__zlq1q">GEM Workshop at EMNLP 2023</span><span class="utils_smallSpace__dcJPu"></span><div><p>The Third Version of the <a href="https://gem-benchmark.com/">Generation, Evaluation &#x26; Metrics (GEM) Workshop</a> will be held as part of <a href="https://2023.emnlp.org/">EMNLP</a>, December 6-10, 2023.</p>
+<h3 id="user-content-overview">Overview</h3>
+<p>Many new NLP applications are cast through the lens of natural language generation. With the advent of these new approaches, many opportunities arise: generation in previously less studied languages, new evaluation paradigms, methods for corpus creation, more efficient architectures, strategies for safe deployments, among many others. At the same time, we can learn from the rich history of NLG research to further improve generation methods.
+These developments require robust and sound NLG evaluation processes. To that end, the GEM workshop aims to encourage the development of model auditing and human evaluation strategies, and to popularize model evaluations in languages beyond English.</p>
+<p>We welcome submissions related, but not limited to, the following topics:</p>
+<ul>
+<li>💎 Automatic evaluation of generation systems (<a href="https://aclanthology.org/2021.gem-1.8/">example</a>, <a href="https://aclanthology.org/2021.gem-1.1/">example</a>, <a href="https://aclanthology.org/2022.gem-1.26/">example</a>)</li>
+<li>💎 Creating NLG corpora and challenge sets (<a href="https://aclanthology.org/2022.tacl-1.4/">example</a>, <a href="https://openreview.net/forum?id=CSi1eu_2q96">example</a>, <a href="https://aclanthology.org/2022.gem-1.6/">example</a>)</li>
+<li>💎 Critiques of benchmarking efforts and responsibly measuring progress in NLG (<a href="https://aclanthology.org/2020.emnlp-main.393/">example</a>, <a href="https://openreview.net/forum?id=j6NxpQbREA1">example</a>)</li>
+<li>💎 Effective and/or efficient NLG methods that can be applied to a wide range of languages and/or scenarios (<a href="https://aclanthology.org/2020.tacl-1.47/">example</a>, <a href="https://aclanthology.org/2021.gem-1.16/">example</a>, <a href="https://aclanthology.org/2022.gem-1.1/">example</a>)</li>
+<li>💎 Application and evaluation of generation models interacting with external data and tools (<a href="https://arxiv.org/abs/2302.04761">example</a>, <a href="https://arxiv.org/abs/2304.09842">example</a>, <a href="https://arxiv.org/abs/2302.07842">example</a>)</li>
+<li>💎 Sociotechnical perspectives of employing large language models (<a href="https://dl.acm.org/doi/abs/10.1145/3531146.3533088">example</a>)</li>
+<li>💎 Standardizing human evaluation and making it more robust (<a href="https://aclanthology.org/2021.tacl-1.87/">example</a>, <a href="https://aclanthology.org/2022.humeval-1.7/">example</a>, <a href="https://aclanthology.org/2022.gem-1.12/">example</a>)</li>
+</ul>
+<p>We further invite submissions that conduct in-depth analyses of outputs of existing systems, for example through error analyses, by applying new metrics, or by testing the system on new test sets. While we encourage the use of the infrastructure the organizing team has developed as part of the <a href="https://arxiv.org/abs/2206.11249">GEM benchmark</a>, its use is not required.</p>
+<p>If you are interested, you can check out last year's workshop websites from <a href="/workshop/2021">ACL 2021</a> and <a href="/workshop/2022">EMNLP 2022</a>.</p>
+<h3 id="user-content-industrial-track---unleashing-the-power-of-nlp-bridging-the-gap-between-academia-and-industry">Industrial Track - Unleashing the Power of NLP: Bridging the Gap between Academia and Industry</h3>
+<p>GEM 2023 is proud to announce the launch of its Industrial Track, which aims to provide actionable insights to industry professionals and to foster collaborations between academia and industry. This track will address the unique challenges faced by non-academic colleagues, highlighting the differences in evaluation practices between academic and industrial research, and explore the challenges in evaluating generative models with real-world data.</p>
+<p>The Industrial Track invites submissions covering the following topics, including (but not limited to):</p>
+<ul>
+<li>💎 Breaking Barriers: Bridging the Gap between Academic and Industrial Research (<a href="https://aclanthology.org/P17-2015">example</a>)</li>
+<li>💎 From Data Diversity to Model Robustness: Challenges in Evaluating Generative Models with Real-World Data (<a href="https://aclanthology.org/2021.sigdial-1.8/">example</a>)</li>
+<li>💎 Beyond Metrics: Evaluating Generative Models for Real-World Business Impact (<a href="https://arxiv.org/abs/1906.02243">example</a>, <a href="https://aclanthology.org/P16-2096">example</a>, <a href="https://arxiv.org/abs/2306.07402">example</a>)</li>
+</ul>
+<h3 id="user-content-how-to-submit">How to submit?</h3>
+<p>Submissions can take either of the following forms:</p>
+<ul>
+<li>💎 Archival Papers Papers describing original and unpublished work can be submitted in a between 4 and 8 page format.</li>
+<li>💎 Non-Archival Abstracts To discuss work already presented or under review at a peer-reviewed venue, we allow the submission of 2-page abstracts.</li>
+</ul>
+<p>All submissions are allowed unlimited space for references and appendices and should conform to EMNLP 2023 style guidelines. Archival paper submissions must be anonymized while abstract submissions may include author information.</p>
+<p>You can submit directly through <a href="https://softconf.com/emnlp2023/GEM2023">SoftConf</a>. Please select the track you are submitting to during the submission.</p>
+<p>We additionally welcome presentations by authors of papers in the Findings of the EMNLP. The selection process is managed centrally by the workshop chairs for the conference and we thus cannot respond to all individual inquiries. However, we will try our best to accomodate your requests.</p>
+<h3 id="user-content-shared-task">Shared Task</h3>
+<p>We are organizing a shared task focused on multilingual summarization, including human and automatic evaluation. The Shared Task will be run "Backwards": the workshop will serve as a platform to pre-register your hypotheses. More info on how to participate to come!</p>
+<h3 id="user-content-important-dates">Important Dates</h3>
+<p>Note: For any questions, please email <a href="mailto:gem-benchmark-chairs@googlegroups.com">gem-benchmark-chairs@googlegroups.com</a>.</p>
+<p>Paper Submission Dates</p>
+<ul>
+<li>📅 8 September 2023: Workshop paper submission deadline</li>
+<li>📅 20 October 2023:   Workshop paper notification deadline</li>
+<li>📅 3 November 2023:  Workshop paper camera ready deadline</li>
+</ul>
+<p><strong>Note</strong> The website showed wrong dates for notication and CR deadlines. Apologies for any inconvenience.</p>
+<p>Workshop Dates</p>
+<ul>
+<li>📅 6 December 2022: Workshop</li>
+</ul>
+<h3 id="user-content-organization">Organization</h3>
+<p><em>Contact</em>:
+<a href="mailto:gem-benchmark-chairs@googlegroups.com">gem-benchmark-chairs@googlegroups.com</a></p>
+<p><em>General Chairs</em></p>
+<p>Khyathi Raghavi Chandu (AI2)</p>
+<p>Elizabeth Clark (Google Deepmind)</p>
+<p>Kaustubh Dhole (Emory University)</p>
+<p>Sebastian Gehrmann (Bloomberg)</p>
+<p>João Sedoc (NYU)</p>
+<p>Alex Wang (Cohere)</p>
+<p><em>Industry Track Chairs</em></p>
+<p>Enrico Santus (Bloomberg)</p>
+<p>Hooman Sedghamiz (Bayer)</p>
+</div></article></main><div class="layout_push__lpoMK"></div></div><footer class="layout_footer__WlhMu utils_eggshell__3hbbY"><span class="layout_backToHome__D9QFr"><a href="/">← Home</a></span><span>If you have any questions, please join our <a href="https://groups.google.com/g/gem-benchmark" target="_blank" class="utils_accentUnderline__VG89l">google group</a> for support.</span></footer></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"workshopData":{"contentHtml":"\u003cp\u003eThe Third Version of the \u003ca href=\"https://gem-benchmark.com/\"\u003eGeneration, Evaluation \u0026#x26; Metrics (GEM) Workshop\u003c/a\u003e will be held as part of \u003ca href=\"https://2023.emnlp.org/\"\u003eEMNLP\u003c/a\u003e, December 6-10, 2023.\u003c/p\u003e\n\u003ch3 id=\"user-content-overview\"\u003eOverview\u003c/h3\u003e\n\u003cp\u003eMany new NLP applications are cast through the lens of natural language generation. With the advent of these new approaches, many opportunities arise: generation in previously less studied languages, new evaluation paradigms, methods for corpus creation, more efficient architectures, strategies for safe deployments, among many others. At the same time, we can learn from the rich history of NLG research to further improve generation methods.\nThese developments require robust and sound NLG evaluation processes. To that end, the GEM workshop aims to encourage the development of model auditing and human evaluation strategies, and to popularize model evaluations in languages beyond English.\u003c/p\u003e\n\u003cp\u003eWe welcome submissions related, but not limited to, the following topics:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Automatic evaluation of generation systems (\u003ca href=\"https://aclanthology.org/2021.gem-1.8/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2021.gem-1.1/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.gem-1.26/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Creating NLG corpora and challenge sets (\u003ca href=\"https://aclanthology.org/2022.tacl-1.4/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://openreview.net/forum?id=CSi1eu_2q96\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.gem-1.6/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Critiques of benchmarking efforts and responsibly measuring progress in NLG (\u003ca href=\"https://aclanthology.org/2020.emnlp-main.393/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://openreview.net/forum?id=j6NxpQbREA1\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Effective and/or efficient NLG methods that can be applied to a wide range of languages and/or scenarios (\u003ca href=\"https://aclanthology.org/2020.tacl-1.47/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2021.gem-1.16/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.gem-1.1/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Application and evaluation of generation models interacting with external data and tools (\u003ca href=\"https://arxiv.org/abs/2302.04761\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://arxiv.org/abs/2304.09842\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://arxiv.org/abs/2302.07842\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Sociotechnical perspectives of employing large language models (\u003ca href=\"https://dl.acm.org/doi/abs/10.1145/3531146.3533088\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Standardizing human evaluation and making it more robust (\u003ca href=\"https://aclanthology.org/2021.tacl-1.87/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.humeval-1.7/\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/2022.gem-1.12/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eWe further invite submissions that conduct in-depth analyses of outputs of existing systems, for example through error analyses, by applying new metrics, or by testing the system on new test sets. While we encourage the use of the infrastructure the organizing team has developed as part of the \u003ca href=\"https://arxiv.org/abs/2206.11249\"\u003eGEM benchmark\u003c/a\u003e, its use is not required.\u003c/p\u003e\n\u003cp\u003eIf you are interested, you can check out last year's workshop websites from \u003ca href=\"/workshop/2021\"\u003eACL 2021\u003c/a\u003e and \u003ca href=\"/workshop/2022\"\u003eEMNLP 2022\u003c/a\u003e.\u003c/p\u003e\n\u003ch3 id=\"user-content-industrial-track---unleashing-the-power-of-nlp-bridging-the-gap-between-academia-and-industry\"\u003eIndustrial Track - Unleashing the Power of NLP: Bridging the Gap between Academia and Industry\u003c/h3\u003e\n\u003cp\u003eGEM 2023 is proud to announce the launch of its Industrial Track, which aims to provide actionable insights to industry professionals and to foster collaborations between academia and industry. This track will address the unique challenges faced by non-academic colleagues, highlighting the differences in evaluation practices between academic and industrial research, and explore the challenges in evaluating generative models with real-world data.\u003c/p\u003e\n\u003cp\u003eThe Industrial Track invites submissions covering the following topics, including (but not limited to):\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Breaking Barriers: Bridging the Gap between Academic and Industrial Research (\u003ca href=\"https://aclanthology.org/P17-2015\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 From Data Diversity to Model Robustness: Challenges in Evaluating Generative Models with Real-World Data (\u003ca href=\"https://aclanthology.org/2021.sigdial-1.8/\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003cli\u003e💎 Beyond Metrics: Evaluating Generative Models for Real-World Business Impact (\u003ca href=\"https://arxiv.org/abs/1906.02243\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://aclanthology.org/P16-2096\"\u003eexample\u003c/a\u003e, \u003ca href=\"https://arxiv.org/abs/2306.07402\"\u003eexample\u003c/a\u003e)\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-how-to-submit\"\u003eHow to submit?\u003c/h3\u003e\n\u003cp\u003eSubmissions can take either of the following forms:\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e💎 Archival Papers Papers describing original and unpublished work can be submitted in a between 4 and 8 page format.\u003c/li\u003e\n\u003cli\u003e💎 Non-Archival Abstracts To discuss work already presented or under review at a peer-reviewed venue, we allow the submission of 2-page abstracts.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003eAll submissions are allowed unlimited space for references and appendices and should conform to EMNLP 2023 style guidelines. Archival paper submissions must be anonymized while abstract submissions may include author information.\u003c/p\u003e\n\u003cp\u003eYou can submit directly through \u003ca href=\"https://softconf.com/emnlp2023/GEM2023\"\u003eSoftConf\u003c/a\u003e. Please select the track you are submitting to during the submission.\u003c/p\u003e\n\u003cp\u003eWe additionally welcome presentations by authors of papers in the Findings of the EMNLP. The selection process is managed centrally by the workshop chairs for the conference and we thus cannot respond to all individual inquiries. However, we will try our best to accomodate your requests.\u003c/p\u003e\n\u003ch3 id=\"user-content-shared-task\"\u003eShared Task\u003c/h3\u003e\n\u003cp\u003eWe are organizing a shared task focused on multilingual summarization, including human and automatic evaluation. The Shared Task will be run \"Backwards\": the workshop will serve as a platform to pre-register your hypotheses. More info on how to participate to come!\u003c/p\u003e\n\u003ch3 id=\"user-content-important-dates\"\u003eImportant Dates\u003c/h3\u003e\n\u003cp\u003eNote: For any questions, please email \u003ca href=\"mailto:gem-benchmark-chairs@googlegroups.com\"\u003egem-benchmark-chairs@googlegroups.com\u003c/a\u003e.\u003c/p\u003e\n\u003cp\u003ePaper Submission Dates\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e📅 8 September 2023: Workshop paper submission deadline\u003c/li\u003e\n\u003cli\u003e📅 20 October 2023:   Workshop paper notification deadline\u003c/li\u003e\n\u003cli\u003e📅 3 November 2023:  Workshop paper camera ready deadline\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp\u003e\u003cstrong\u003eNote\u003c/strong\u003e The website showed wrong dates for notication and CR deadlines. Apologies for any inconvenience.\u003c/p\u003e\n\u003cp\u003eWorkshop Dates\u003c/p\u003e\n\u003cul\u003e\n\u003cli\u003e📅 6 December 2022: Workshop\u003c/li\u003e\n\u003c/ul\u003e\n\u003ch3 id=\"user-content-organization\"\u003eOrganization\u003c/h3\u003e\n\u003cp\u003e\u003cem\u003eContact\u003c/em\u003e:\n\u003ca href=\"mailto:gem-benchmark-chairs@googlegroups.com\"\u003egem-benchmark-chairs@googlegroups.com\u003c/a\u003e\u003c/p\u003e\n\u003cp\u003e\u003cem\u003eGeneral Chairs\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eKhyathi Raghavi Chandu (AI2)\u003c/p\u003e\n\u003cp\u003eElizabeth Clark (Google Deepmind)\u003c/p\u003e\n\u003cp\u003eKaustubh Dhole (Emory University)\u003c/p\u003e\n\u003cp\u003eSebastian Gehrmann (Bloomberg)\u003c/p\u003e\n\u003cp\u003eJoão Sedoc (NYU)\u003c/p\u003e\n\u003cp\u003eAlex Wang (Cohere)\u003c/p\u003e\n\u003cp\u003e\u003cem\u003eIndustry Track Chairs\u003c/em\u003e\u003c/p\u003e\n\u003cp\u003eEnrico Santus (Bloomberg)\u003c/p\u003e\n\u003cp\u003eHooman Sedghamiz (Bayer)\u003c/p\u003e\n"}},"__N_SSG":true},"page":"/workshop/2023-call","query":{},"buildId":"V1edrWahfIsCPthpIgASU","isFallback":false,"gsp":true,"scriptLoader":[]}</script></body></html>
\ No newline at end of file

Start	End
9:00	10:30	Opening Remarks + 6 x 12 minutes talk
10:30	11:00	Coffee Break
11:00	12:30	Poster Session
12:30	14:00	Lunch Break
14:00	15:30	7 x 12 minutes talk
15:30	16:00	Coffee Break
16:00	17:30	Poster Session II
ID	Type	Title	Authors
223	Findings	MacLaSa: Multi-Aspect Controllable Text Generation via Efficient Sampling from Compact Latent Space	Hanxing Ding, Liang Pang, Zihao Wei, Huawei Shen, Xueqi Cheng, Tat-Seng Chua
271	Findings	Vector-Quantized Prompt Learning for Paraphrase Generation	Haotian Luo, Yixin Liu, Peidong Liu, Xianggen Liu
300	Findings	DeltaScore: Story Evaluation with Perturbations	Zhuohan Xie, Miao Li, Trevor Cohn, Jey Han Lau
469	Findings	Show, Write, and Retrieve: Entity-aware Article Generation and Retrieval	Zhongping Zhang, Yiwen Gu, Bryan A. Plummer
575	Findings	Adversarial Text Generation by Search and Learning	Guoyi Li, Bingkang Shi, Zongzhen Liu, Dehan Kong, Yulei Wu, Xiaodan Zhang, Longtao Huang, Honglei Lyu
651	Findings	On Uncertainty Calibration and Selective Generation in Probabilistic Neural Summarization: A Benchmark Study	Polina Zablotskaia, Du Phan, Joshua Maynez, Shashi Narayan, Jie Ren, Jeremiah Zhe Liu
731	Findings	GROVE: A Retrieval-augmented Complex Story Generation Framework with A Forest of Evidence	Zhihua Wen, Zhiliang Tian, Wei Wu, Yuxin Yang, Yanqi Shi, Zhen Huang, Dongsheng Li
963	Findings	A Confederacy of Models: a Comprehensive Evaluation of LLMs on Creative Writing	Carlos Gómez-Rodríguez, Paul Williams
1154	Findings	Can Large Language Models Fix Data Annotation Errors? An Empirical Study Using Debatepedia for Query-Focused Text Summarization	Md Tahmid Rahman Laskar, Mizanur Rahman, Israt Jahan, Enamul Hoque, Jimmy Huang
1470	Findings	Uniform Complexity for Text Generation	Joseph Marvin Imperial, Harish Tayyar Madabushi
1548	Findings	Unraveling Downstream Gender Bias from Large Language Models: A Study on AI Educational Writing Assistance	Thiemo Wambsganss, Xiaotian Su, Vinitra Swamy, Seyed Parsa Neshaei, Roman Rietsche, Tanja Käser
1562	Findings	Geographical Erasure in Language Generation	Pola Schwöbel, Jacek Golebiowski, Michele Donini, Cedric Archambeau, Danish Pruthi
1807	Findings	Miracle: Towards Personalized Dialogue Generation with Latent-Space Multiple Personal Attribute Control	Zhenyi Lu, Wei Wei, Xiaoye Qu, Xian-Ling Mao, Dangyang Chen, Jixiong Chen
1834	Findings	A Comprehensive Evaluation of Tool-Assisted Generation Strategies	Alon Jacovi, Avi Caciularu, Jonathan Herzig, Roee Aharoni, Bernd Bohnet, Mor Geva
1897	Findings	Stylized Dialogue Generation with Feature-Guided Knowledge Augmentation	Jinpeng Li, Zekai Zhang, Xiuying Chen, Dongyan Zhao, Rui Yan
1992	Findings	Harnessing the power of LLMs: Evaluating human-AI text co-creation through the lens of news headline generation	Zijian Ding, Alison Smith-Renner, Wenjuan Zhang, Joel R. Tetreault, Alejandro Jaimes
1993	Findings	InfoDiffusion: Information Entropy Aware Diffusion Process for Non-Autoregressive Text Generation	Renzhi Wang, Jing Li, Piji Li
2053	Findings	The Iron(ic) Melting Pot: Reviewing Human Evaluation in Humour, Irony and Sarcasm Generation	Tyler Loakman, Aaron Maladry, Chenghua Lin
2490	Findings	Ask To The Point: Open-Domain Entity-Centric Question Generation	Yuxiang Liu, Jie Huang, Kevin Chang
2493	Findings	Frugal Prompting for Dialog Models	Bishal Santra, Sakya Basak, Abhinandan De, Manish Gupta, Pawan Goyal
2716	Findings	Towards Informative Open-ended Text Generation with Dynamic Knowledge Triples	Zixuan Ren, Yang Zhao, Chengqing Zong
2876	Findings	Harnessing the Power of Large Language Models for Empathetic Response Generation: Empirical Investigations and Improvements	Yushan Qian, Weinan Zhang, Ting Liu
3010	Findings	T5Score: Discriminative Fine-tuning of Generative Evaluation Metrics	Yiwei Qin, Weizhe Yuan, Graham Neubig, Pengfei Liu
3019	Findings	NLP Evaluation in trouble: On the Need to Measure LLM Data Contamination for each Benchmark	Oscar Sainz, Jon Ander Campos, Iker García-Ferrero, Julen Etxaniz, Oier Lopez de Lacalle, Eneko Agirre
3386	Findings	Narrative Order Aware Story Generation via Bidirectional Pretraining Model with Optimal Transport Reward	Zhicong Lu, Li Jin, Guangluan Xu, Linmei Hu, Nayu Liu, Xiaoyu Li, Xian Sun, Zequn Zhang, kaiwen wei
3613	Findings	Goodtriever: Adaptive Toxicity Mitigation with Retrieval-augmented Models	Luiza Amador Pozzobon, Beyza Ermis, Patrick Lewis, Sara Hooker
3726	Findings	Don’t Add, don’t Miss: Effective Content Preserving Generation from Pre-Selected Text Spans	Aviv Slobodkin, Avi Caciularu, Eran Hirsch, Ido Dagan
3802	Findings	Ensemble-Instruct: Instruction Tuning Data Generation with a Heterogeneous Mixture of LMs	Young-Suk Lee, Md Arafat Sultan, Yousef El-Kurdi, Tahira Naseem, Asim Munawar, Radu Florian, Salim Roukos, Ramón Fernandez Astudillo
4841	Findings	A Closer Look into Using Large Language Models for Automatic Evaluation	Cheng-Han Chiang, Hung-yi Lee
4954	Findings	Pseudointelligence: A Unifying Lens on Language Model Evaluation	Shikhar Murty, Orr Paradise, Pratyusha Sharma
5156	Findings	Improving Pacing in Long-Form Story Planning	Yichen Wang, Kevin Yang, Xiaoming Liu, Dan Klein
5166	Findings	“Kelly is a Warm Person, Joseph is a Role Model”: Gender Biases in LLM-Generated Reference Letters	Yixin Wan, George Pu, Jiao Sun, Aparna Garimella, Kai-Wei Chang, Nanyun Peng
5563	Findings	Bridging Discrete and Continuous Text Spaces for Accelerated Seq2Seq Diffusion Models	Shansan Gong, Mukai Li, Jiangtao Feng, Zhiyong Wu, Lingpeng Kong
5603	Findings	Exploring Context-Aware Evaluation Metrics for Machine Translation	Xinyu Hu, Xunjian Yin, Xiaojun Wan
3	Main Track	Contextualizing the Limits of Model & Evaluation Dataset Curation on Semantic Similarity Classification Tasks	Daniel Theron
4	Main Track	Dialogue Quality and Emotion Annotations for Customer Support Conversations	John Mendonca, Patrícia Pereira, Miguel Menezes, Vera Cabarrão, Ana C Farinha, Helena Moniz, Alon Lavie and Isabel Trancoso
7	Main Track	Formalizing content creation and evaluation methods for AI-generated social media content	Christian Jensen and Axel Højmark
9	Main Track	Automatic Evaluation of Generative Models with Instruction Tuning	Shuhaib Mehri and Vered Shwartz
11	Main Track	FACTSCORE: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation	Sewon Min, Kalpesh Krishna, Xinxi Lyu, Mike Lewis, Wen-tau Yih, Pang Wei Koh, Mohit Iyyer, Luke Zettlemoyer and Hannaneh Hajishirzi
12	Main Track	Effective Proxy for Human Labeling: Ensemble Disagreement Scores in Large Language Models for Industrial NLP	Wei Du, Laksh Advani, Yashmeet Gambhir, Daniel Perry, Prashant Shiralkar, Zhengzheng Xing and Aaron Colak
14	Main Track	Automatic Reflection Generation for Peer-to-Peer Counseling	Emma O'Neil, João Sedoc, Diyi Yang, Haiyi Zhu and Lyle Ungar
16	Main Track	One-Shot and Few-Shot Exemplification Modeling	John Harvill, Hee Suk Yoon, Eunseop Yoon, Mark Hasegawa-Johnson and Chang Yoo
21	Main Track	QAMPARI: A Benchmark for Open-domain Questions with Many Answers	Samuel Amouyal, Tomer Wolfson, Ohad Rubin, Ori Yoran, Jonathan Herzig and Jonathan Berant
23	Main Track	Unveiling Safety Vulnerabilities of Large Language Models	George Kour, Marcel Zalmanovici, Naama Zwerdling, Esther Goldbraich, Ora Nova Fandina, Ateret Anaby Tavor, Orna Raz and Eitan Farchi
24	Main Track	Adapting Pre-trained Generative Models for Extractive Question Answering	Prabir Mallick, Tapas Nayak and Indrajit Bhattacharya
25	Main Track	Predicting Question-Answering Performance of Large Language Models through Semantic Consistency	Ella Rabinovich, Samuel Ackerman, Orna Raz, Eitan Farchi and Ateret Anaby Tavor
28	Main Track	Towards Effective Long-Form QA with Evidence Augmentation	Mengxia Yu, Sara Rosenthal, Mihaela Bornea and Avi Sil
30	Main Track	Harnessing the Plug-and-Play Controller by Prompting	Hao Wang and Lei Sha
32	Main Track	Context and Literacy Aware Learnable Metric for Text Simplification	Jeongwon Kwak, Hyeryun Park, Kyungmo Kim and Jinwook Choi
33	Main Track	Synthetic Dialogue Dataset Generation using LLM Agents	Yelaman Abdullin, Diego Molla, Bahadorreza Ofoghi, John Yearwood and Qingyang Li
34	Main Track	An Empirical Bayes Framework for Open-Domain Dialogue Generation	Jing Yang Lee, Kong Aik Lee and Woon Seng Gan
36	Main Track	Flesch or Fumble? Evaluating Readability Standard Alignment of Instruction-Tuned Language Models	Joseph Marvin Imperial and Harish Tayyar Madabushi
38	Main Track	ChatGPT as a Java Decompiler	Bradley McDanel and Zhanhao Liu
41	Main Track	Multi-domain Summarization from Leaderboards to Practice: Re-examining Automatic and Human Evaluation	David Demeter, Oshin Agarwal, Simon Ben Igeri, Marko Sterbentz, Neil Molino, John Conroy and Ani Nenkova
43	Main Track	Targeted Image Data Augmentation Increases Basic Skills Captioning Robustness	Valentin Barriere, Felipe del Rio, Andres Carvallo, Carlos Aspillaga, Eugenio Herrera-Berg and Cristian Buc
45	Main Track	Separating form and meaning: Using self-consistency to quantify task understanding across multiple senses	Xenia Ohmer, Elia Bruni and Dieuwke Hupkes
46	Main Track	Text Encoders Lack Knowledge: Leveraging Generative LLMs for Domain-Specific Semantic Textual Similarity	Joseph Gatto, Omar Sharif, Parker Seegmiller, Philip Bohlman and Sarah Masud Preum
51	Main Track	To Burst or Not to Burst: Generating and Quantifying Improbable Text	Kuleen Sasse, Efsun Sarioglu Kayi, Samuel Barham and Edward Staley
52	Main Track	Are Large Language Models Reliable Judges? A Study on the Factuality Evaluation Capabilities of LLMs	Xue-Yong Fu, Md Tahmid Rahman Laskar, Cheng Chen and Shashi Bhushan TN
54	Main Track	RankAug: Augmented data ranking for text classification	Tiasa Singha Roy and Priyam Basu
67	Main Track	Post Turing: Mapping the landscape of LLM Evaluation	Alexey Tikhonov and Ivan Yamshchikov
56	Main Track	Elo Uncovered: Robustness and Best Practices in Language Model Evaluation	Meriem Boubdir, Edward Kim, Beyza Ermis, Sara Hooker and Marzieh Fadaee
62	Main Track	PersonalityChat: Conversation Distillation for Personalized Dialog Modeling with Facts and Traits	Ehsan Lotfi, Maxime De Bruyn, Jeska Buhmann and Walter Daelemans
63	Main Track	How well ChatGPT understand Malaysian English? An Evaluation on Named Entity Recognition and Relation Extraction	MohanRaj Chanthran, Lay-Ki Soon, Ong Huey Fang and Bhawani Selvaretnam
57	Extended Abstract	Robust Tooling and New Resources for Large Language Model Evaluation via Catwalk	Kyle Richardson, Ian Magnusson, Oyvind Tafjord,Akshita Bhagia, Iz Beltagy, Arman Cohan, Pradeep Dasigi,Jesse Dodge, Dirk Groeneveld, Yuling Gu, Ananya Harsh Jha, Tushar Khot and Nishant Subramani
58	Extended Abstract	GUMSum: Multi-Genre Data and Evaluation for English Abstractive Summarization	Yang Janet Liu and Amir Zeldes
60	Extended Abstract	NewsMet: A ‘Do It All' dataset of contemporary Metaphors in News headlines	Rohan Joseph, Timothy Liu, Aik Beng Ng, Simon See and Sunny Rai
20	Extended Abstract	On the State of German (Abstractive) Text Summarization	Dennis Aumiller, Jing Fan and Michael Gertz
31	Extended Abstract	Measuring misogyny in natural language generation: preliminary results from a case study on two Reddit communities	Aaron Snoswell, Lucinda Nelson, Hao Xue, Flora Salim, Nicolas Suzor and Jean Burgess
35	Extended Abstract	On the Learnability of Watermarks for Language Models	Chenchen Gu, Xiang Lisa Li, Percy Liang and Tatsunori Hashimoto
47	Extended Abstract	Does Writing with Language Models Reduce Content Diversity?	Vishakh Padmakumar and He He
39	Extended Abstract	Generative language models exhibit social identity biases	Tiancheng Hu, Yara Kyrychenko, Jon Roozenbeek and Nigel Collier
70	Industry Track	A Simple yet Efficient Ensemble Approach for AI-generated Text Detection	Harika Abburi, Kalyani Roy, Michael Suesserman, Nirmala Pudota, Balaji Veeramani, Edward Bowen and Sanmitra Bhattacharya
17	Industry Track	Leveraging Large Language Models for Enhanced Product Descriptions in eCommerce	Jianghong Zhou, Bo Liu, Jhalak Nilesh Acharya, Yao Hong, Kuang-chih Lee and Musen Wen
55	Industry Track	Separating the Wheat from the Chaff with BREAD: An open-source benchmark and metrics to detect redundancy in text	Isaac Caswell, Lisa Wang and Isabel Papadimitriou