-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from tobias-watzel/init_utils
Init commit
- Loading branch information
Showing
52 changed files
with
3,828 additions
and
232 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,4 @@ | ||
# .coveragerc to control coverage.py | ||
[run] | ||
branch = True | ||
source = osc_data_extractor | ||
# omit = bad_file.py | ||
|
||
[paths] | ||
source = | ||
src/ | ||
*/site-packages/ | ||
|
||
[report] | ||
# Regexes for lines to exclude from consideration | ||
exclude_lines = | ||
# Have to re-enable the standard pragma | ||
pragma: no cover | ||
|
||
# Don't complain about missing debug-only code: | ||
def __repr__ | ||
if self\.debug | ||
|
||
# Don't complain if tests don't hit defensive assertion code: | ||
raise AssertionError | ||
raise NotImplementedError | ||
|
||
# Don't complain if non-runnable code isn't run: | ||
if 0: | ||
if __name__ == .__main__.: | ||
omit = osc_extraction_utils/tests*, osc_extraction_utils/utils.py, osc_extraction_utils/s3_communication.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
kpi_id,question,sectors,add_year,kpi_category,, | ||
1,In which year was the annual report or the sustainability report published?,"OG, CM, CU",FALSE,TEXT,, | ||
2,What is the total volume of proven and probable hydrocarbons reserves?,OG,TRUE,"TEXT, TABLE",, | ||
6,What is the total amount of direct greenhouse gases emissions referred to as scope 1 emissions?,"CU, OG",TRUE,"TEXT, TABLE",, | ||
7,What is the total amount of energy indirect greenhouse gases emissions referred to as scope 2 emissions?,"CU, OG",TRUE,"TEXT, TABLE",, | ||
9,What is the base year for carbon reduction commitment?,"OG, CM, CU",FALSE,"TEXT, TABLE",, | ||
11,What is the target year for climate commitment?,"OG, CM, CU",FALSE,TEXT,, | ||
12,What is the target carbon reduction in percentage?,"OG, CM, CU",TRUE,TEXT,, |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
kpi_id,question,sectors,add_year,kpi_category,, | ||
1,In which year was the annual report or the sustainability report published?,"OG, CM, CU",FALSE,TEXT,, | ||
2,What is the total volume of proven and probable hydrocarbons reserves?,OG,TRUE,"TEXT, TABLE",, | ||
6,What is the total amount of direct greenhouse gases emissions referred to as scope 1 emissions?,"CU, OG",TRUE,"TEXT, TABLE",, | ||
7,What is the total amount of energy indirect greenhouse gases emissions referred to as scope 2 emissions?,"CU, OG",TRUE,"TEXT, TABLE",, | ||
9,What is the base year for carbon reduction commitment?,"OG, CM, CU",FALSE,"TEXT, TABLE",, | ||
11,What is the target year for climate commitment?,"OG, CM, CU",FALSE,TEXT,, | ||
12,What is the target carbon reduction in percentage?,"OG, CM, CU",TRUE,TEXT,, |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
--- | ||
general: | ||
ext_ip: 172.30.15.68 | ||
ext_port: 4000 | ||
infer_ip: 172.30.88.213 | ||
infer_port: 6000 | ||
rb_ip: 172.30.224.91 | ||
rb_port: 8000 | ||
delete_interim_files: true | ||
data_export: | ||
enable_db_export: false | ||
db_dialect: oracle | ||
db_sql_driver: cx_oracle | ||
db_host: "" | ||
db_port: "1521" | ||
db_user: "" | ||
db_password: "" | ||
db_post_command: "" | ||
extraction: | ||
min_paragraph_length: 20 | ||
seed: 42 | ||
annotation_folder: null | ||
skip_extracted_files: true | ||
use_extractions: true | ||
store_extractions: true | ||
curation: | ||
retrieve_paragraph: false | ||
neg_pos_ratio: 1 | ||
columns_to_read: | ||
- company | ||
- source_file | ||
- source_page | ||
- kpi_id | ||
- year | ||
- answer | ||
- data_type | ||
- relevant_paragraphs | ||
company_to_exclude: [] | ||
create_neg_samples: true | ||
min_length_neg_sample: 50 | ||
seed: 41 | ||
train_relevance: | ||
base_model: roberta-base | ||
input_model_name: null | ||
output_model_name: TEST_1 | ||
train: true | ||
seed: 42 | ||
processor: | ||
proc_max_seq_len: 512 | ||
proc_dev_split: 0.2 | ||
proc_label_list: | ||
- "0" | ||
- "1" | ||
proc_label_column_name: label | ||
proc_delimiter: "," | ||
proc_metric: acc | ||
model: | ||
model_layer_dims: | ||
- 768 | ||
- 2 | ||
model_lm_output_types: | ||
- per_sequence | ||
training: | ||
run_hyp_tuning: false | ||
use_amp: true | ||
distributed: false | ||
learning_rate: 0.00001 | ||
n_epochs: 10 | ||
evaluate_every: 100 | ||
dropout: 0.2 | ||
batch_size: 4 | ||
grad_acc_steps: 1 | ||
run_cv: false | ||
xval_folds: 5 | ||
max_processes: 128 | ||
infer_relevance: | ||
skip_processed_files: true | ||
batch_size: 16 | ||
gpu: true | ||
num_processes: null | ||
disable_tqdm: true | ||
kpi_questions: [] | ||
sectors: | ||
- OG | ||
- CM | ||
- CU | ||
return_class_probs: false | ||
train_kpi: | ||
input_model_name: null | ||
output_model_name: TEST_1 | ||
base_model: a-ware/roberta-large-squadv2 | ||
train: true | ||
seed: 42 | ||
curation: | ||
val_ratio: 0 | ||
seed: 42 | ||
find_new_answerable: true | ||
create_unanswerable: true | ||
data: | ||
perform_splitting: true | ||
dev_split: 0.2 | ||
mlflow: | ||
track_experiment: false | ||
url: http://localhost:5000 | ||
processor: | ||
max_seq_len: 384 | ||
label_list: | ||
- start_token | ||
- end_token | ||
metric: squad | ||
model: | ||
model_layer_dims: | ||
- 768 | ||
- 2 | ||
model_lm_output_types: | ||
- per_token | ||
training: | ||
run_hyp_tuning: false | ||
use_amp: true | ||
distributed: false | ||
learning_rate: 0.00001 | ||
n_epochs: 10 | ||
evaluate_every: 100 | ||
dropout: 0.3 | ||
batch_size: 4 | ||
grad_acc_steps: 1 | ||
run_cv: false | ||
xval_folds: 5 | ||
metric: f1 | ||
max_processes: 1 | ||
infer_kpi: | ||
skip_processed_files: false | ||
top_k: 4 | ||
batch_size: 16 | ||
gpu: true | ||
num_processes: null | ||
no_ans_boost: -15 | ||
rule_based: | ||
verbosity: 2 | ||
use_docker: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
--- | ||
main_bucket: | ||
s3_endpoint: LANDING_AWS_ENDPOINT | ||
s3_access_key: LANDING_AWS_ACCESS_KEY | ||
s3_secret_key: LANDING_AWS_SECRET_KEY | ||
s3_bucket_name: LANDING_AWS_BUCKET_NAME | ||
interim_bucket: | ||
s3_endpoint: INTERIM_AWS_ENDPOINT | ||
s3_access_key: INTERIM_AWS_ACCESS_KEY | ||
s3_secret_key: INTERIM_AWS_SECRET_KEY | ||
s3_bucket_name: INTERIM_AWS_BUCKET_NAME | ||
prefix: corporate_data_extraction_projects |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Welcome to MkDocs | ||
|
||
For full documentation visit [mkdocs.org](https://www.mkdocs.org). | ||
|
||
## Commands | ||
|
||
- `mkdocs new [dir-name]` - Create a new project. | ||
- `mkdocs serve` - Start the live-reloading docs server. | ||
- `mkdocs build` - Build the documentation site. | ||
- `mkdocs -h` - Print help message and exit. | ||
|
||
## Project layout | ||
|
||
mkdocs.yml # The configuration file. | ||
docs/ | ||
index.md # The documentation homepage. | ||
... # Other markdown pages, images and other files. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
--- | ||
site_name: My Docs |
Empty file.
Oops, something went wrong.