diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index bf00b84..ba50f5e 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -36,7 +36,7 @@ jobs: echo "password: ${{ secrets.ENA_PASSWORD }}" >> .secrets.yml - name: Test submission in --draft mode run: | - ena-upload-cli --action add --draft --dev --center ${{ secrets.ENA_CENTER }} --data example_data/ENA_TEST1.R1.fastq.gz example_data/ENA_TEST2.R1.fastq.gz example_data/ENA_TEST2.R2.fastq.gz --checklist ERC000033 --secret .secret.yml --xlsx example_tables/ENA_excel_example_ERC000033.xlsx + ena-upload-cli --action add --draft --dev --center TEST --data example_data/ENA_TEST1.R1.fastq.gz example_data/ENA_TEST2.R1.fastq.gz example_data/ENA_TEST2.R2.fastq.gz --checklist ERC000033 --secret .secret.yml --xlsx example_tables/ENA_excel_example_ERC000033.xlsx - name: Run Python to get temp directory run: | echo "TEMP_DIR=$(python -c 'import tempfile; print(tempfile.gettempdir())')" >> $GITHUB_ENV diff --git a/.gitignore b/.gitignore index 4e6ddc8..a8da505 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ build/ ena_upload_cli.egg-info/ __pycache__/ +tests/ena_upload/ +.idea/ diff --git a/README.md b/README.md index dd7722b..cf4458f 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,15 @@ The command line tool will automatically fetch the correct scientific name based | sample_alias_4 | sample_title_2 | 2697049 | Severe acute respiratory syndrome coronavirus 2 | covid-19 | sample_description_1 | 2020-10-11 | Argentina | | sample_alias_5 | sample_title_3 | 2697049 | Severe acute respiratory syndrome coronavirus 2 | covid-19 | sample_description_2 | 2008-01-24 | Belgium | +#### Custom attributes + +Additional custom attributes (i.e. attributes not specified in the ERC checklist) can be added to the sample table by adding columns which headers are named like `sample_attribute[attribute_name]`; for example `sample_attribute[treatment]`, `sample_attribute[age]`... + +| alias | ... | sample_attribute[treatment] | sample_attribute[age] +|----------------|----------------|---------------------|------------------------| +| sample_alias_4 | ... | treated | 2 days +| sample_alias_5 | ... | untreated | 2 days + #### Viral submissions If you want to submit viral samples you can use the [ENA virus pathogen](https://www.ebi.ac.uk/ena/browser/view/ERC000033) checklist by adding `ERC000033` to the checklist parameter. Check out our [viral example command](#test-the-tool) as demonstration. Please use the [ENA virus pathogen](https://github.com/ELIXIR-Belgium/ENA-metadata-templates/tree/main/templates/ERC000033) checklist in our template repo to know what is allowed/possible in the `Controlled vocabulary`fields. @@ -116,6 +125,9 @@ If you want to submit viral samples you can use the [ENA virus pathogen](https:/ Please check out the [template](https://github.com/ELIXIR-Belgium/ENA-metadata-templates) of your checklist to discover which attributes are mandatory for the study, experiment and run ENA object. +#### Study and Experiment custom attributes + +Similarly to samples, additional custom attributes can be added to the experiment and study tables by adding columns which headers are named like `experiment_attribute[attribute_name]` and `study_attribute[attribute_name]` in the experiment and study tables, respectively. ### Dev instance @@ -231,7 +243,7 @@ By default the updated tables after submission will have the action `added` in t * **Release submission** ``` - ena-upload-cli --action release --center'your_center_name' --study example_tables/ENA_template_studies_release.tsv --dev --secret .secret.yml + ena-upload-cli --action release --center 'your_center_name' --study example_tables/ENA_template_studies_release.tsv --dev --secret .secret.yml ``` > **Note for Windows users:** Windows, by default, does not support wildcard expansion in command-line arguments. diff --git a/ena_upload/ena_upload.py b/ena_upload/ena_upload.py index 5b7dbe1..ef11f0c 100644 --- a/ena_upload/ena_upload.py +++ b/ena_upload/ena_upload.py @@ -214,6 +214,16 @@ def generate_stream(schema, targets, Template, center, tool): :return: stream ''' + # find all columns in targets which column header matches the pattern attribute[(.*)], extract the group + # and return a dict[header] = group + # eg for header run_attribute[sex] => {'run_attribute[sex]': 'sex'} + pattern = re.compile(rf"{schema}_attribute\[(.*)\]") + extra_attributes = {} + for column in targets.columns: + match = re.match(pattern, column) + if match: + extra_attributes[column] = match.group(1) + if schema == 'run': # These attributes are required for rendering # the run xml templates @@ -221,6 +231,11 @@ def generate_stream(schema, targets, Template, center, tool): if 'file_format' in targets: targets.rename(columns={'file_format': 'file_type'}, inplace=True) file_attrib = ['file_name', 'file_type', 'file_checksum'] + if 'read_type' in targets: + file_attrib.append('read_type') + if 'read_label' in targets: + file_attrib.append('read_label') + other_attrib = ['alias', 'experiment_alias'] # Create groups with alias as index run_groups = targets[other_attrib].groupby('alias')['experiment_alias'].first().to_dict() @@ -230,11 +245,14 @@ def generate_stream(schema, targets, Template, center, tool): stream = Template.generate(run_groups=run_groups, file_groups=file_groups, center=center, + extra_attributes=extra_attributes, tool_name=tool['tool_name'], tool_version=tool['tool_version']) else: stream = Template.generate( - df=targets, center=center, tool_name=tool['tool_name'], tool_version=tool['tool_version']) + df=targets, center=center, extra_attributes=extra_attributes, + tool_name=tool['tool_name'], tool_version=tool['tool_version'] + ) return stream @@ -982,7 +1000,7 @@ def main(): if pd.notna(row['scientific_name']) and pd.isna(row['taxon_id']): # retrieve taxon id using scientific name taxonID = get_taxon_id(row['scientific_name']) - df.loc[index, 'taxon_id'] = taxonID + df.loc[index, 'taxon_id'] = int(taxonID) elif pd.notna(row['taxon_id']) and pd.isna(row['scientific_name']): # retrieve scientific name using taxon id scientificName = get_scientific_name(row['taxon_id']) diff --git a/ena_upload/templates/ENA_template_PLATFORM.xml b/ena_upload/templates/ENA_template_PLATFORM.xml index 8ba2974..5e14ed0 100644 --- a/ena_upload/templates/ENA_template_PLATFORM.xml +++ b/ena_upload/templates/ENA_template_PLATFORM.xml @@ -26,6 +26,7 @@ Illumina MiSeq Illumina MiniSeq Illumina NovaSeq X + Illumina NovaSeq X Plus Illumina NovaSeq 6000 NextSeq 500 NextSeq 550 diff --git a/ena_upload/templates/ENA_template_READ_TYPE.xml b/ena_upload/templates/ENA_template_READ_TYPE.xml new file mode 100644 index 0000000..ba52ebc --- /dev/null +++ b/ena_upload/templates/ENA_template_READ_TYPE.xml @@ -0,0 +1,9 @@ + +single +paired +cell_barcode +umi_barcode +feature_barcode +sample_barcode +spatial_barcode + \ No newline at end of file diff --git a/ena_upload/templates/ENA_template_experiments.xml b/ena_upload/templates/ENA_template_experiments.xml index 71254e6..3e146e0 100755 --- a/ena_upload/templates/ENA_template_experiments.xml +++ b/ena_upload/templates/ENA_template_experiments.xml @@ -17,62 +17,70 @@ def mandatorytest(row, column, index): xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_6/SRA.experiment.xsd"> - - - ${row.title} + + + ${row.title} + + + + + + + ${row.design_description} - - + + ${row.spot_descriptor} - - - ${row.design_description} + + + + + + ${row.library_name} - - ${row.spot_descriptor} + + - - + + - - - ${row.library_name} - - - - - - - - - - - - - - - - - - ${row.library_construction_protocol} - - - - - - - + + + + + + + + + + + ${row.library_construction_protocol} + + + + + + + + + + + + + ${tag} + ${row[header]} + - - - SUBMISSION_TOOL - ${tool_name} - - - SUBMISSION_TOOL_VERSION - ${tool_version} - - - + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + diff --git a/ena_upload/templates/ENA_template_runs.xml b/ena_upload/templates/ENA_template_runs.xml index 15feae7..3f8d081 100644 --- a/ena_upload/templates/ENA_template_runs.xml +++ b/ena_upload/templates/ENA_template_runs.xml @@ -2,6 +2,9 @@ - + + + + + + ${rlabel.strip()} + + + + + + + + + + + + + diff --git a/ena_upload/templates/ENA_template_samples_ERC000011.xml b/ena_upload/templates/ENA_template_samples_ERC000011.xml index 0eba21f..4731e70 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000011.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000011.xml @@ -87,12 +87,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (region and locality) @@ -123,6 +117,12 @@ def mandatorytest(row, column, index): ${row['sex']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + lab_host @@ -213,6 +213,14 @@ def mandatorytest(row, column, index): ${row['strain']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000012.xml b/ena_upload/templates/ENA_template_samples_ERC000012.xml index 6d8f3d4..9512c97 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000012.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000012.xml @@ -179,12 +179,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -262,6 +256,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -516,6 +516,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000013.xml b/ena_upload/templates/ENA_template_samples_ERC000013.xml index a52c525..9ca5147 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000013.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000013.xml @@ -203,12 +203,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -285,6 +279,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -645,6 +645,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000014.xml b/ena_upload/templates/ENA_template_samples_ERC000014.xml index a058565..75668c8 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000014.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000014.xml @@ -203,12 +203,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -273,6 +267,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + nose/mouth/teeth/throat disorder @@ -667,6 +667,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000015.xml b/ena_upload/templates/ENA_template_samples_ERC000015.xml index 68919a3..83f5348 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000015.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000015.xml @@ -191,12 +191,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -261,6 +255,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + gastrointestinal tract disorder @@ -564,6 +564,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000016.xml b/ena_upload/templates/ENA_template_samples_ERC000016.xml index d8979e9..b276c2c 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000016.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000016.xml @@ -197,12 +197,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -267,6 +261,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + nose/mouth/teeth/throat disorder @@ -565,6 +565,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000017.xml b/ena_upload/templates/ENA_template_samples_ERC000017.xml index b9c1a8e..96a13cb 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000017.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000017.xml @@ -191,12 +191,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -261,6 +255,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + dermatology disorder @@ -565,6 +565,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000018.xml b/ena_upload/templates/ENA_template_samples_ERC000018.xml index 6fa9752..c8f99dc 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000018.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000018.xml @@ -191,12 +191,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -261,6 +255,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + gynecological disorder @@ -613,6 +613,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000019.xml b/ena_upload/templates/ENA_template_samples_ERC000019.xml index 5f1a08e..48595bb 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000019.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000019.xml @@ -179,12 +179,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -256,6 +250,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -768,6 +768,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000020.xml b/ena_upload/templates/ENA_template_samples_ERC000020.xml index 68e6842..3823752 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000020.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000020.xml @@ -203,12 +203,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -297,6 +291,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -799,6 +799,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000021.xml b/ena_upload/templates/ENA_template_samples_ERC000021.xml index 89cd672..4e758fe 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000021.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000021.xml @@ -198,12 +198,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -275,6 +269,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -806,6 +806,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000022.xml b/ena_upload/templates/ENA_template_samples_ERC000022.xml index 1618ac6..8ba9520 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000022.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000022.xml @@ -216,12 +216,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -317,6 +311,12 @@ def mandatorytest(row, column, index): ${row['extreme_unusual_properties/Al saturation method']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -708,6 +708,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000023.xml b/ena_upload/templates/ENA_template_samples_ERC000023.xml index ed8f8b0..a018865 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000023.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000023.xml @@ -179,12 +179,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -249,6 +243,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + biochemical oxygen demand @@ -608,6 +608,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000024.xml b/ena_upload/templates/ENA_template_samples_ERC000024.xml index 13219ee..5b8e71d 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000024.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000024.xml @@ -200,12 +200,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -277,6 +271,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -913,6 +913,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000025.xml b/ena_upload/templates/ENA_template_samples_ERC000025.xml index 8f8ebd1..2ce7413 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000025.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000025.xml @@ -186,12 +186,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -263,6 +257,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -636,6 +636,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000027.xml b/ena_upload/templates/ENA_template_samples_ERC000027.xml index adbff3b..7cca27b 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000027.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000027.xml @@ -219,12 +219,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - broad-scale environmental context @@ -300,6 +294,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + specific host @@ -910,6 +910,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000028.xml b/ena_upload/templates/ENA_template_samples_ERC000028.xml index dad6a27..9ab9ddf 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000028.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000028.xml @@ -57,12 +57,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (region and locality) @@ -87,6 +81,12 @@ def mandatorytest(row, column, index): ${row['mating_type']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host health state @@ -153,6 +153,14 @@ def mandatorytest(row, column, index): ${row['strain']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000029.xml b/ena_upload/templates/ENA_template_samples_ERC000029.xml index 990525e..0013e1b 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000029.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000029.xml @@ -82,12 +82,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -146,6 +140,12 @@ def mandatorytest(row, column, index): ${row['pathotype']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -316,6 +316,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000030.xml b/ena_upload/templates/ENA_template_samples_ERC000030.xml index 63f63cd..3a394cb 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000030.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000030.xml @@ -141,12 +141,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - broad-scale environmental context @@ -224,6 +218,12 @@ def mandatorytest(row, column, index): mg Chl/m3 + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + Citation @@ -243,6 +243,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000031.xml b/ena_upload/templates/ENA_template_samples_ERC000031.xml index 7a1bc45..67496f2 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000031.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000031.xml @@ -172,12 +172,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -983,6 +977,12 @@ def mandatorytest(row, column, index): year + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -1423,6 +1423,14 @@ def mandatorytest(row, column, index): ${row['aerospace structure']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000032.xml b/ena_upload/templates/ENA_template_samples_ERC000032.xml index 3d667a7..5e87477 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000032.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000032.xml @@ -177,12 +177,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -216,6 +210,12 @@ def mandatorytest(row, column, index): ${row['sample capture status']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease outcome @@ -391,6 +391,14 @@ def mandatorytest(row, column, index): ${row['isolation source non-host-associated']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000033.xml b/ena_upload/templates/ENA_template_samples_ERC000033.xml index 1f2e3c1..bed2ce4 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000033.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000033.xml @@ -81,12 +81,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -120,6 +114,12 @@ def mandatorytest(row, column, index): ${row['sample capture status']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease outcome @@ -253,6 +253,14 @@ def mandatorytest(row, column, index): ${row['isolation source non-host-associated']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000034.xml b/ena_upload/templates/ENA_template_samples_ERC000034.xml index 2ed7797..6810291 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000034.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000034.xml @@ -45,12 +45,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - sex @@ -75,6 +69,12 @@ def mandatorytest(row, column, index): ${row['diagnosis']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + strain @@ -112,6 +112,14 @@ def mandatorytest(row, column, index): ${row['Further Details']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000035.xml b/ena_upload/templates/ENA_template_samples_ERC000035.xml index cab5871..78ec6f1 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000035.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000035.xml @@ -69,12 +69,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - sampling time point @@ -99,6 +93,12 @@ def mandatorytest(row, column, index): ${row['age']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + phenotype @@ -249,6 +249,14 @@ def mandatorytest(row, column, index): ${row['growth condition']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000036.xml b/ena_upload/templates/ENA_template_samples_ERC000036.xml index b0bb3af..e664dcd 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000036.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000036.xml @@ -95,12 +95,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -140,6 +134,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + receipt date @@ -183,6 +183,14 @@ def mandatorytest(row, column, index): ${row['nucleic acid amplification']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000037.xml b/ena_upload/templates/ENA_template_samples_ERC000037.xml index a809bbd..c0f1b25 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000037.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000037.xml @@ -222,12 +222,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -328,6 +322,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + climate environment @@ -636,6 +636,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000038.xml b/ena_upload/templates/ENA_template_samples_ERC000038.xml index 0768468..7dde69e 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000038.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000038.xml @@ -118,12 +118,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - broad-scale environmental context @@ -199,6 +193,12 @@ def mandatorytest(row, column, index): mg + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + treatment agent @@ -218,6 +218,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000039.xml b/ena_upload/templates/ENA_template_samples_ERC000039.xml index fcba7cf..a6622cb 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000039.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000039.xml @@ -81,12 +81,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -114,6 +108,12 @@ def mandatorytest(row, column, index): year + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease outcome @@ -205,6 +205,14 @@ def mandatorytest(row, column, index): ${row['isolation source non-host-associated']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000040.xml b/ena_upload/templates/ENA_template_samples_ERC000040.xml index 8c3356e..e6b3f0f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000040.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000040.xml @@ -90,12 +90,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -140,6 +134,12 @@ def mandatorytest(row, column, index): ${row['environmental_sample']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + salinity @@ -166,6 +166,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000041.xml b/ena_upload/templates/ENA_template_samples_ERC000041.xml index f8e3fea..149c363 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000041.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000041.xml @@ -96,12 +96,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (region and locality) @@ -127,6 +121,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + receipt date @@ -175,6 +175,14 @@ def mandatorytest(row, column, index): ${row['protocol']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000043.xml b/ena_upload/templates/ENA_template_samples_ERC000043.xml index e056edf..c7b12de 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000043.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000043.xml @@ -64,12 +64,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -104,6 +98,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + light intensity @@ -161,6 +161,14 @@ def mandatorytest(row, column, index): ${row['growth condition']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000044.xml b/ena_upload/templates/ENA_template_samples_ERC000044.xml index 5310c50..572fb38 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000044.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000044.xml @@ -69,12 +69,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (region and locality) @@ -88,6 +82,12 @@ def mandatorytest(row, column, index): year + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -136,6 +136,14 @@ def mandatorytest(row, column, index): ${row['isolation source host-associated']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000045.xml b/ena_upload/templates/ENA_template_samples_ERC000045.xml index 0e8a8df..93fc3a5 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000045.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000045.xml @@ -51,18 +51,18 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (region and locality) ${row['geographic location (region and locality)']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + receipt date @@ -81,6 +81,14 @@ def mandatorytest(row, column, index): ${row['serotype']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000047.xml b/ena_upload/templates/ENA_template_samples_ERC000047.xml index 360ffe0..13160c9 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000047.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000047.xml @@ -223,12 +223,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -287,6 +281,12 @@ def mandatorytest(row, column, index): ${row['size fraction selected']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + source material identifiers @@ -384,6 +384,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000048.xml b/ena_upload/templates/ENA_template_samples_ERC000048.xml index 94f8141..82b5a92 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000048.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000048.xml @@ -198,12 +198,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -262,6 +256,12 @@ def mandatorytest(row, column, index): ${row['size fraction selected']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + source material identifiers @@ -389,6 +389,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000049.xml b/ena_upload/templates/ENA_template_samples_ERC000049.xml index f058963..b5ef07a 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000049.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000049.xml @@ -246,12 +246,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -310,6 +304,12 @@ def mandatorytest(row, column, index): ${row['size fraction selected']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host scientific name @@ -461,6 +461,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000050.xml b/ena_upload/templates/ENA_template_samples_ERC000050.xml index ddc7d39..bab10f0 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000050.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000050.xml @@ -211,12 +211,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -269,6 +263,12 @@ def mandatorytest(row, column, index): ${row['size fraction selected']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + source material identifiers @@ -354,6 +354,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000051.xml b/ena_upload/templates/ENA_template_samples_ERC000051.xml index c67ad21..d9b2741 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000051.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000051.xml @@ -69,12 +69,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - engrafted tumor sample passage @@ -93,6 +87,12 @@ def mandatorytest(row, column, index): ${row['patient tumor site of collection']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + engraftment host strain name @@ -129,6 +129,14 @@ def mandatorytest(row, column, index): ${row['patient sex']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000052.xml b/ena_upload/templates/ENA_template_samples_ERC000052.xml index 20f0111..6745c95 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000052.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000052.xml @@ -76,12 +76,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -116,6 +110,12 @@ def mandatorytest(row, column, index): years + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -248,6 +248,14 @@ def mandatorytest(row, column, index): ${row['adapters']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000053.xml b/ena_upload/templates/ENA_template_samples_ERC000053.xml index 963d769..5e78e73 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000053.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000053.xml @@ -145,12 +145,6 @@ def mandatorytest(row, column, index): ${row['collection date']} - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -234,6 +228,12 @@ def mandatorytest(row, column, index): ${row['sex']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + collecting institution @@ -295,6 +295,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000055.xml b/ena_upload/templates/ENA_template_samples_ERC000055.xml index b008d16..36d8470 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000055.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000055.xml @@ -340,12 +340,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -439,6 +433,12 @@ def mandatorytest(row, column, index): ${row['plant structure']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -899,7 +899,6 @@ def mandatorytest(row, column, index): library preparation kit ${row['library preparation kit']} - % @@ -1036,6 +1035,12 @@ def mandatorytest(row, column, index): ${row['food product type']} + + + plant product + ${row['plant product']} + + relative location of sample @@ -1121,6 +1126,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000056.xml b/ena_upload/templates/ENA_template_samples_ERC000056.xml index bc6dc8e..91cadb1 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000056.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000056.xml @@ -475,7 +475,7 @@ def mandatorytest(row, column, index): ${row['assembly name']} - + collection date ${row['collection date']} @@ -488,12 +488,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -667,6 +661,12 @@ def mandatorytest(row, column, index): ${row['sequencing kit']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -1281,7 +1281,6 @@ def mandatorytest(row, column, index): library preparation kit ${row['library preparation kit']} - % @@ -1782,6 +1781,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000057.xml b/ena_upload/templates/ENA_template_samples_ERC000057.xml index 6eb68af..d6b397a 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000057.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000057.xml @@ -190,7 +190,7 @@ def mandatorytest(row, column, index): ${row['16S recovery software']} - + collection date ${row['collection date']} @@ -203,12 +203,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -273,6 +267,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -485,6 +485,12 @@ def mandatorytest(row, column, index): ${row['symbiotic host organism life cycle type']} + + + host dependence + ${row['host dependence']} + + host specificity @@ -732,6 +738,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000058.xml b/ena_upload/templates/ENA_template_samples_ERC000058.xml index 5556dae..e35e49a 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000058.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000058.xml @@ -257,7 +257,7 @@ def mandatorytest(row, column, index): ${row['field name']} - + collection date ${row['collection date']} @@ -270,12 +270,6 @@ def mandatorytest(row, column, index): m - - - geographic location (country and/or sea) - ${row['geographic location (country and/or sea)']} - - geographic location (latitude) @@ -367,6 +361,12 @@ def mandatorytest(row, column, index): ${row['sample storage duration']} + + + geographic location (country and/or sea) + ${row['geographic location (country and/or sea)']} + + host disease status @@ -995,6 +995,14 @@ def mandatorytest(row, column, index): % + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_studies.xml b/ena_upload/templates/ENA_template_studies.xml index 374e743..250f051 100755 --- a/ena_upload/templates/ENA_template_studies.xml +++ b/ena_upload/templates/ENA_template_studies.xml @@ -45,6 +45,14 @@ def mandatorytest(row, column, index): + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/SRA.common.xsd b/ena_upload/templates/SRA.common.xsd index cc04c56..0a61a79 100644 --- a/ena_upload/templates/SRA.common.xsd +++ b/ena_upload/templates/SRA.common.xsd @@ -967,6 +967,7 @@ + diff --git a/ena_upload/templates/jinja_templates/ENA_template_READ_TYPE.xml b/ena_upload/templates/jinja_templates/ENA_template_READ_TYPE.xml new file mode 100644 index 0000000..54d3ec4 --- /dev/null +++ b/ena_upload/templates/jinja_templates/ENA_template_READ_TYPE.xml @@ -0,0 +1,7 @@ + +{%- for value in attributes %} +{%- if value != '' %} +{{value}} +{%- endif %} +{%- endfor %} + diff --git a/ena_upload/templates/jinja_templates/ENA_template_samples.xml b/ena_upload/templates/jinja_templates/ENA_template_samples.xml index 8b8e978..a3b5862 100755 --- a/ena_upload/templates/jinja_templates/ENA_template_samples.xml +++ b/ena_upload/templates/jinja_templates/ENA_template_samples.xml @@ -48,6 +48,14 @@ def mandatorytest(row, column, index): {%- endfor %} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/example_tables/ENA_template_samples_xtra_attrs.tsv b/example_tables/ENA_template_samples_xtra_attrs.tsv new file mode 100644 index 0000000..f0397c8 --- /dev/null +++ b/example_tables/ENA_template_samples_xtra_attrs.tsv @@ -0,0 +1,3 @@ +alias title scientific_name sample_description collection date geographic location (country and/or sea) sample_attribute[age] sample_attribute[treatment] +sample_alias_4 sample_title_1 homo sapiens sample_description_1 2020-10-11 Argentina 2 days treated +sample_alias_5 sample_title_2 human metagenome sample_description_2 2008-01-24 Belgium 2 days untreated diff --git a/var/xml_converter.py b/var/xml_converter.py old mode 100644 new mode 100755 index 7016b4b..15e0eba --- a/var/xml_converter.py +++ b/var/xml_converter.py @@ -34,6 +34,9 @@ def fetching_checklists(): def main(): + is_test = False + export_path_prefix = 'tests/' if is_test else '' + for response_object in fetching_checklists(): checklist = response_object['accession'] print(f"Parsing {checklist}") @@ -74,7 +77,8 @@ def main(): output_from_parsed_template = t.render(attributes=xml_tree) # Saving new xml template file - with open(f"ena_upload/templates/ENA_template_samples_{checklist}.xml", "wb") as fh: + + with open(f"{export_path_prefix}ena_upload/templates/ENA_template_samples_{checklist}.xml", "wb") as fh: fh.write(output_from_parsed_template.encode('utf-8')) diff --git a/var/xsd_converter.py b/var/xsd_converter.py old mode 100644 new mode 100755 index 74b805c..aa199f4 --- a/var/xsd_converter.py +++ b/var/xsd_converter.py @@ -1,3 +1,6 @@ +import argparse +import os + from lxml import etree from jinja2 import Environment, FileSystemLoader import requests @@ -5,6 +8,9 @@ from urllib3.util.retry import Retry import time +from ena_upload.ena_upload import SmartFormatter + + def fetch_object(url): """ Fetch single BrAPI object by path @@ -71,9 +77,14 @@ def findkeys(node, query): for j in node.values(): for x in findkeys(j, query): yield x - + + def main(): - mapping = { "run":["FILE"], "experiment":["LIBRARY_SELECTION", "LIBRARY_SOURCE", "LIBRARY_STRATEGY"], "common":["PLATFORM"]} + # turn to True to export in tests folder + is_test = False + export_path_prefix = 'tests/' if is_test else '' + + mapping = { "run":["FILE", "READ_TYPE"], "experiment":["LIBRARY_SELECTION", "LIBRARY_SOURCE", "LIBRARY_STRATEGY"], "common":["PLATFORM"]} template_names= ["ENA.project", "SRA.common", "SRA.experiment", "SRA.run", "SRA.sample", "SRA.study", "SRA.submission"] for template_name in template_names: @@ -83,7 +94,11 @@ def main(): url = f"https://raw.githubusercontent.com/enasequence/webin-xml/master/src/main/resources/uk/ac/ebi/ena/sra/schema/{template_name}.xsd" response = fetch_object(url) - open(f'ena_upload/templates/{template_name}.xsd', 'wb').write(response) + if is_test: + os.makedirs(f'{export_path_prefix}ena_upload/templates', exist_ok=True) + open(f'{export_path_prefix}ena_upload/templates/{template_name}.xsd', 'wb').write(response) + else: + open(f'ena_upload/templates/{template_name}.xsd', 'wb').write(response) if template_name_sm in mapping.keys(): @@ -102,6 +117,9 @@ def main(): if template_block == "FILE": query_dict = (list(findkeys(xsd_dict, 'filetype')))[0] xml_tree = query_dict['simpleType']['restriction']['enumeration'] + elif template_block == "READ_TYPE": + query_dict = (list(findkeys(xsd_dict, 'READ_TYPE')))[0] + xml_tree = query_dict['simpleType']['restriction']['enumeration'] elif template_block == "LIBRARY_SELECTION": query_dict = (list(findkeys(xsd_dict, 'typeLibrarySelection')))[0] xml_tree = query_dict['restriction']['enumeration'] @@ -120,8 +138,7 @@ def main(): else: break - - + print(f"Parsed values: {xml_tree}") # Loading the xml jinja2 template for samples @@ -131,11 +148,10 @@ def main(): output_from_parsed_template = t.render(attributes=xml_tree) # Saving new xml template file - with open(f"ena_upload/templates/ENA_template_{template_block}.xml", "w") as fh: + with open(f"{export_path_prefix}ena_upload/templates/ENA_template_{template_block}.xml", "w") as fh: fh.write(output_from_parsed_template) - if __name__ == "__main__": main()