From 44d69aba44b6a5b43969dbaa6b06e4ef5f2b4b4f Mon Sep 17 00:00:00 2001 From: Eszti Date: Mon, 18 Nov 2024 20:19:10 +0100 Subject: [PATCH] Prepare dev_random pipeline --- tuw_nlp/sem/hrg/Documentation.md | 16 ++++++++++++--- .../hrg/pipeline/config/merge_dev_random.json | 14 +++++++++++++ .../pipeline/config/pipeline_dev_random.json | 20 +++++++++++++++++++ .../sem/hrg/pipeline/config/random_dev.json | 2 ++ 4 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 tuw_nlp/sem/hrg/pipeline/config/merge_dev_random.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_random.json diff --git a/tuw_nlp/sem/hrg/Documentation.md b/tuw_nlp/sem/hrg/Documentation.md index 1b2badb..76b25d8 100644 --- a/tuw_nlp/sem/hrg/Documentation.md +++ b/tuw_nlp/sem/hrg/Documentation.md @@ -63,7 +63,7 @@ python steps/predict/predict.py -d $DATA_DIR -c pipeline/config/predict_100.json Once all sentences are predicted, we [merge](steps/predict/merge.py) them into one json per model. ```bash - python steps/predict/merge.py -d $DATA_DIR -c pipeline/config/merge_100.json +python steps/predict/merge.py -d $DATA_DIR -c pipeline/config/merge_100.json ``` #### Run the whole predict pipeline on dev @@ -81,10 +81,20 @@ python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_300.jso ### Create a random predictions for comparison -We implement a [random extractor](random/random_extractor.py) that uses the [artefacts](random/train_stat) of the training dataset (distribution of the number of extractions per sentence, and distribution of labels per length of the sentence) and assures that the predicate is a verb. +We implement a [random extractor](steps/random/random_extractor.py) that uses the [artefacts](pipeline/output/artefacts) of the training dataset (distribution of the number of extractions per sentence, and distribution of labels per length of the sentence) and assures that the predicate is a verb. ```bash -# TBD +# Extract artefacts +python steps/random/artefacts.py -d $DATA_DIR -c pipeline/config/artefacts_train.json + +# Get random extractions +python steps/random/random_extractor.py -d $DATA_DIR -c pipeline/config/random_dev.json + +# Merge the extractions +python steps/predict/merge.py -d $DATA_DIR -c pipeline/config/merge_dev_random.json + +# Or run as a pipeline +python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_random.json ``` ### Evaluate the predictions diff --git a/tuw_nlp/sem/hrg/pipeline/config/merge_dev_random.json b/tuw_nlp/sem/hrg/pipeline/config/merge_dev_random.json new file mode 100644 index 0000000..fd32f12 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/merge_dev_random.json @@ -0,0 +1,14 @@ +{ + "in_dir": "dev_random", + "k": 10, + "bolinas_chart_filters": + [ + "boa", + "argidx" + ], + "postprocess": + [ + "" + ], + "out_dir": "dev_extractions" +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_random.json b/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_random.json new file mode 100644 index 0000000..8f5fbd4 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_random.json @@ -0,0 +1,20 @@ +{ + "steps": + [ + { + "step_name": "artefacts", + "script_name": "artefacts", + "config": "artefacts_train.json" + }, + { + "step_name": "random", + "script_name": "random", + "config": "random_dev.json" + }, + { + "step_name": "merge_random", + "script_name": "merge", + "config": "merge_dev_random.json" + } + ] +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/random_dev.json b/tuw_nlp/sem/hrg/pipeline/config/random_dev.json index 4623ffe..0e39e75 100644 --- a/tuw_nlp/sem/hrg/pipeline/config/random_dev.json +++ b/tuw_nlp/sem/hrg/pipeline/config/random_dev.json @@ -1,5 +1,7 @@ { + "in_dir": "dev_preproc", "out_dir": "dev_random", + "artefact_prefix": "artefacts_train", "models": [ "boa",