forked from pixegami/rag-tutorial-v2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcli_flags.py
116 lines (101 loc) · 4.71 KB
/
cli_flags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import defaults
import split_methods
import model_providers
# CLI interface flags. `dest` attribute names are specified for clarity though they are normally inferred from the flags themselves; these indicate the attribute names of instantiated parser objects.
# IMPORTANT: If editing `dest` attributes here, please change all the corresponding namespace attribute references throughout the codebase as well.
split_method_args: list[str] = ["--sm", "--split-method"]
split_method_kwargs: dict = {
"dest": "split_method",
"type": str,
"default": defaults.DEFAULT_SPLIT_METHOD,
"choices": split_methods.SPLIT_METHOD_CHOICES,
"help": "Specifies splitting method. Use langchain's recursive text splitting ('recursive') or the experimental semantic text splitting ('semantic'). The methods are implemented only for pdfs.",
}
data_path_args: list[str] = ["--data", "--data-path"]
data_path_kwargs: dict = {
"dest": "data_path",
"type": str,
"default": defaults.DEFAULT_DATA_PATH,
"help": "Specifies data path.",
}
db_path_args: list[str] = ["--db", "--db-path"]
db_path_kwargs: dict = {
"dest": "db_path",
"type": str,
"default": defaults.DEFAULT_DB_PATH,
"help": "Specifies database path.",
}
reset_db_args: list[str] = ["--reset", "--reset-db"]
reset_db_kwargs: dict = {
"dest": "reset_db",
"action": "store_true",
"default": False, # default is False regardless; specified for clarity
"help": "Resets the database.",
}
query_text_args: list[str] = ["-q", "--query"]
query_text_kwargs: dict = {
"dest": "query_text",
"type": str,
"help": "Specifies query text.",
}
recursive_chunk_size_args: list[str] = ["--rcs", "--recursive-chunk-size"]
recursive_chunk_size_kwargs: dict = {
"dest": "recursive_chunk_size",
"type": int,
"default": defaults.DEFAULT_RECURSIVE_CHUNK_SIZE,
"help": "Specifies chunk size for 'recursive' split method.",
}
recursive_chunk_overlap_args: list[str] = ["--rco", "--recursive-chunk-overlap"]
recursive_chunk_overlap_kwargs: dict = {
"dest": "recursive_chunk_overlap",
"type": int,
"default": defaults.DEFAULT_RECURSIVE_CHUNK_OVERLAP,
"help": "Specifies chunk overlap for 'recursive' split method.",
}
semantic_breakpoint_threshold_amount_args: list[str] = [
"--sbta",
"--semantic-breakpoint-threshold-amount",
]
semantic_breakpoint_threshold_amount_kwargs: dict = {
"dest": "semantic_breakpoint_threshold_amount",
"type": int,
"default": defaults.DEFAULT_SEMANTIC_BREAKPOINT_THRESHOLD_AMOUNT,
"help": "Specifies breakpoint threshold amount for 'semantic' split method.",
}
num_sources_args: list[str] = ["-n", "--num-sources"]
num_sources_kwargs: dict = {
"dest": "num_sources",
"type": int,
"default": defaults.DEFAULT_NUM_SOURCES,
"help": "Specifies how many chunks/sources to take into account when answering a query.",
}
embedding_model_provider_args: list[str] = ["--emp", "--embedding-model-provider"]
embedding_model_provider_kwargs: dict = {
"dest": "embedding_model_provider",
"type": str,
"default": defaults.DEFAULT_EMBEDDING_MODEL_PROVIDER,
"choices": model_providers.EMBEDDING_MODEL_PROVIDER_CHOICES,
"help": f"Specifies embedding model provider to use. Default is '{defaults.DEFAULT_EMBEDDING_MODEL_PROVIDER}'. It is recommended to use the same model and model provider for refreshing the database as well as querying data.",
}
embedding_model_args: list[str] = ["--em", "--embedding-model"]
embedding_model_kwargs: dict = {
"dest": "embedding_model",
"type": str,
"default": defaults.DEFAULT_EMBEDDING_MODEL,
"help": f"Specifies embedding model to use in string format. May be a model name, a path to a local model file, etc. depending on the model provider and local implementation. Default is '{defaults.DEFAULT_EMBEDDING_MODEL}'. It is recommended to use the same model and model provider for refreshing the database as well as querying data.",
}
language_model_provider_args: list[str] = ["--lmp", "--language-model-provider"]
language_model_provider_kwargs: dict = {
"dest": "language_model_provider",
"type": str,
"default": defaults.DEFAULT_LANGUAGE_MODEL_PROVIDER,
"choices": model_providers.LANGUAGE_MODEL_PROVIDER_CHOICES,
"help": f"Specifies language model provider to use. Default is '{defaults.DEFAULT_LANGUAGE_MODEL_PROVIDER}'",
}
language_model_args: list[str] = ["--lm", "--language-model"]
language_model_kwargs: dict = {
"dest": "language_model",
"type": str,
"default": defaults.DEFAULT_LANGUAGE_MODEL,
"help": f"Specifies language model to use in string format. May be a model name, a path to a local model file, etc. depending on the model provider and local implementation. Default is '{defaults.DEFAULT_LANGUAGE_MODEL}'.",
}