Skip to content

Commit

Permalink
Merge pull request #370 from nextstrain/export_auth
Browse files Browse the repository at this point in the history
Reduce No Author Errors
  • Loading branch information
rneher authored Sep 5, 2019
2 parents f419898 + 7e953e2 commit 3435007
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 1 deletion.
5 changes: 4 additions & 1 deletion augur/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,10 @@ def construct_author_info_nexflu(metadata, tree, nodes):
authorsInTree.add(nodes[node.name]["authors"])

author_info = defaultdict(lambda: {"n": 0})
no_authors = 0
for strain, data in metadata.items():
if "authors" not in data:
print("Error - {} had no authors".format(strain))
no_authors += 1
continue
if data["authors"] not in authorsInTree:
continue
Expand All @@ -286,6 +287,8 @@ def construct_author_info_nexflu(metadata, tree, nodes):
if attr in author_info[authors] and data[attr].strip() != author_info[authors][attr].strip():
print("Error - {} had contradictory {}(s): {} vs {}".format(authors, attr, data[attr], author_info[authors][attr]))
author_info[authors][attr] = data[attr].strip()
if no_authors:
print("Warning - {} samples did not have author information.".format(no_authors))

return author_info

Expand Down
36 changes: 36 additions & 0 deletions tests/builds/tb/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ rule all:
input:
auspice_tree = "auspice/tb_tree.json",
auspice_meta = "auspice/tb_meta.json",
auspice_noauth_tree = "auspice/tb-noauth_tree.json",
auspice_noauth_meta = "auspice/tb-noauth_meta.json",
auspice_seq = "auspice/tb_seq.json",
auspice_seq_v2 = "auspice/tb_seq_v2.json",
auspice_main = "auspice/tb.json"
Expand All @@ -13,6 +15,7 @@ rule config:
params:
seq = "data/tb.vcf.gz",
meta = "data/meta.tsv",
no_auth_meta = "data/meta-noAuth.tsv",
exclude = "data/dropped_strains.txt",
mask = "data/Locus_to_exclude_Mtb.bed",
ref = "data/ref.fasta",
Expand All @@ -22,6 +25,7 @@ rule config:
genes = "data/genes.txt",
colors = "data/color.tsv",
config = "data/config.json",
no_auth_config = "data/no-auth-config.json",
geo_info = "data/lat_longs.tsv",
clades = "data/clades.tsv"

Expand Down Expand Up @@ -209,6 +213,38 @@ rule export:
augur validate --json {output.tree} {output.meta}
"""

rule no_auth_export:
message: "Exporting data files for for auspice using nextflu compatible schema"
input:
tree = rules.refine.output.tree,
metadata = config.no_auth_meta,
branch_lengths = rules.refine.output.node_data,
traits = rules.traits.output,
nt_muts = rules.ancestral.output.nt_data,
aa_muts = rules.translate.output.aa_data,
color_defs = config.colors,
config = config.no_auth_config,
geo_info = config.geo_info,
clades = rules.clades.output.clade_data,
ref = config.ref,
translations = rules.translate.output.fasta_out
output:
tree = rules.all.input.auspice_noauth_tree,
meta = rules.all.input.auspice_noauth_meta
shell:
"""
augur export \
--tree {input.tree} \
--metadata {input.metadata} \
--reference {input.ref} --reference-translations {input.translations} \
--node-data {input.branch_lengths} {input.traits} {input.aa_muts} {input.nt_muts} {input.clades} \
--auspice-config {input.config} \
--colors {input.color_defs} \
--lat-longs {input.geo_info} \
--output-tree {output.tree} \
--output-meta {output.meta}
augur validate --json {output.tree} {output.meta}
"""

rule exportv2:
message: "Exporting data files for for auspice using nextstrain schema v2"
Expand Down
Loading

0 comments on commit 3435007

Please sign in to comment.