-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathrun.py
47 lines (43 loc) · 2.1 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import argparse
import pathlib
from exsclaim.pipeline import Pipeline
# Path to/ Name of Query JSON, used if no command line argument is supplied
# If no file extension (.json) is supplied, path will be assumed to be:
# path/to/exsclaim/query/<QUERY>.json
QUERY = "nature-haadf-ag-nanoparticles"
# First letter of tools to be run, if no command line argument is supplied
TOOLS = "jcf"
# Parse Command Line arguments, if present
parser = argparse.ArgumentParser(description='Run the EXSCLAIM! Pipeline')
parser.add_argument('--query', '-q', type=str, default=None,
help=('Name of EXSCLAIM Query JSON, defined here: '
'https://github.com/MaterialEyes/exsclaim/wiki/JSON-Schema#query-json-'
'. Samples are in the query folder. If a file '
'extension is included (.json) the variable will '
'be treated as a full path, otherwise the full path '
'will be assumed as /path/to/exsclaim/query/<query>.json'
'. If no value is supplied, QUERY variable in '
'run.py will be used.'))
parser.add_argument('--tools', '-t', type=str, default=None,
help=('String containing the first letter of each tool '
'to be run on input query.\nJ\tJournalScraper\nC\t'
'CaptionDistributor\nF\FigureSeparator. Order and '
'case insensitive. If no value is supplied, TOOLS '
'variable in run.py will be used.'))
args = parser.parse_args()
if args.query is not None:
QUERY = args.query
if args.tools is not None:
TOOLS = args.tools
# Format args to run enter into Pipeline
TOOLS = TOOLS.lower()
if ".json" not in QUERY:
current_file = pathlib.Path(__file__).resolve(strict=True)
queries = current_file.parent / "query"
QUERY = queries / (QUERY + ".json")
f = "f" in TOOLS
j = "j" in TOOLS
c = "c" in TOOLS
# Run the pipeline
pipeline = Pipeline(QUERY)
pipeline.run(journal_scraper=j, caption_distributor=c, figure_separator=f)