Skip to content

Commit

Permalink
the origin form of phrase already included in itertools.product()
Browse files Browse the repository at this point in the history
also add lemmas with '-' replaced by ' '
  • Loading branch information
xxyzz committed Oct 23, 2021
1 parent f6b6d50 commit e9d0d0e
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
7 changes: 4 additions & 3 deletions data/dump_lemmas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
if '(' in lemma: # 'take (something) into account'
continue

if '/' not in lemma:
keyword_processor.add_keyword(lemma, values)

if ' ' in lemma: # phrase, for example: 'slick back/down'
list_of_inflections_list = []
for word in lemma.split(' '):
Expand All @@ -32,9 +29,13 @@
product(*list_of_inflections_list)):
keyword_processor.add_keyword(phrase, values)
else:
keyword_processor.add_keyword(lemma, values)
for inflection in filter(lambda x: x != lemma and x not in lemma,
chain(*getAllInflections(lemma).values())):
keyword_processor.add_keyword(inflection, values)

if '-' in lemma:
keyword_processor.add_keyword(lemma.replace('-', ' '), values)

with open('lemmas_dump', 'wb') as f:
pickle.dump(keyword_processor, f)
2 changes: 1 addition & 1 deletion tests/LanguageLayer.en.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"count": [
[
9758
9779
]
],
"glosses": [
Expand Down

0 comments on commit e9d0d0e

Please sign in to comment.