From eaac7eadc2686632cce6c997a8cb5a6687ba815c Mon Sep 17 00:00:00 2001 From: souvik ghosh Date: Wed, 6 Sep 2023 13:55:17 +0200 Subject: [PATCH] ref: add zwo as alternative for 2 --- Duckling/Numeral/DE/Corpus.hs | 15 +++++++++++++++ Duckling/Numeral/DE/NumParser.hs | 5 ++++- Duckling/Numeral/DE/Rules.hs | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Duckling/Numeral/DE/Corpus.hs b/Duckling/Numeral/DE/Corpus.hs index 69b03132a..0e6814c9d 100644 --- a/Duckling/Numeral/DE/Corpus.hs +++ b/Duckling/Numeral/DE/Corpus.hs @@ -36,6 +36,11 @@ allExamples = concat , "Eine" , "einen" ] + , examples (NumeralValue 2) + [ "2" + , "Zwei" + , "Zwo" + ] , examples (NumeralValue 3) [ "3" , "Drei" @@ -77,6 +82,16 @@ allExamples = concat [ "18" , "achtzehn" ] + , examples (NumeralValue 82) + [ "82" + , "zwoundachtzig" + , "zweiundachtzig" + ] + , examples (NumeralValue 182) + [ "182" + , "einhundertzwoundachtzig" + , "einhundertzweiundachtzig" + ] , examples (NumeralValue 200) [ "200" , "zweihundert" diff --git a/Duckling/Numeral/DE/NumParser.hs b/Duckling/Numeral/DE/NumParser.hs index 4e3762d42..1f09b4d81 100644 --- a/Duckling/Numeral/DE/NumParser.hs +++ b/Duckling/Numeral/DE/NumParser.hs @@ -103,6 +103,9 @@ two = , times10 = [assign 20 "zwanzig"] } +two_alternative :: NumItem +two_alternative = defaultNumItem 2 "zwo" + three :: NumItem three = (defaultNumItem 3 "drei") @@ -139,7 +142,7 @@ nine :: NumItem nine = defaultNumItem 9 "neun" digitLexicon :: [NumItem] -digitLexicon = [one, two, three, four, five, six, seven, eight, nine] +digitLexicon = [one, two_alternative, two, three, four, five, six, seven, eight, nine] from1to9 :: NumParser from1to9 = foldr ((<|>) . base) empty digitLexicon diff --git a/Duckling/Numeral/DE/Rules.hs b/Duckling/Numeral/DE/Rules.hs index 79553595d..02c90959f 100644 --- a/Duckling/Numeral/DE/Rules.hs +++ b/Duckling/Numeral/DE/Rules.hs @@ -218,7 +218,7 @@ ruleIntegerWithThousandsSeparator = Rule ruleAllNumeralWords :: Rule ruleAllNumeralWords = Rule { name = "simple and complex numerals written as one word" - , pattern = [regex "(ein|zwei|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"] + , pattern = [regex "(ein|zwei|zwo|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch matches) : _) -> (parseNumeral $ concat $ Text.unpack . Text.toLower <$> matches)