Skip to content

Commit

Permalink
use nlp.pipe() when create both word wise and x-ray file
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Jul 19, 2021
1 parent 2362dce commit 436896a
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions parse_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@ def do_job(data, create_ww=True, create_x=True,
(_, book_fmt, asin, book_path, _, lang) = data
model = lang['spacy'] + prefs['model_size']
install_libs(model, create_ww, create_x)
is_kfx = book_fmt == 'KFX'

if create_ww:
ll_conn, ll_path = create_lang_layer(asin, book_path, book_fmt)
if ll_conn is None:
create_ww = False
if not create_x:
return
else:
lemmas = load_json('data/lemmas.json')

if create_x:
x_ray_conn, x_ray_path = create_x_ray_db(asin, book_path, lang['wiki'])
Expand All @@ -37,22 +40,19 @@ def do_job(data, create_ww=True, create_x=True,
'parser', 'attribute_ruler', 'lemmatizer'])
nlp.enable_pipe("senter")

is_kfx = book_fmt == 'KFX'
if create_ww:
lemmas = load_json('data/lemmas.json')
for (text, start) in parse_book(book_path, is_kfx):
find_lemma(start, text, lemmas, ll_conn, is_kfx)
if create_x:
find_named_entity(start, x_ray, nlp(text), is_kfx)

save_db(ll_conn, ll_path)
else:
for doc, start in nlp.pipe(parse_book(book_path, is_kfx),
as_tuples=True):
find_named_entity(start, x_ray, doc, is_kfx)
if create_ww:
find_lemma(start, doc.text, lemmas, ll_conn, is_kfx)

if create_x:
x_ray.finish(x_ray_path)
elif create_ww:
for text, start in parse_book(book_path, is_kfx):
find_lemma(start, text, lemmas, ll_conn, is_kfx)

if create_ww:
save_db(ll_conn, ll_path)


def parse_book(book_path, is_kfx):
Expand Down

0 comments on commit 436896a

Please sign in to comment.