-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig.yml
176 lines (171 loc) · 4.51 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
---
# common parameters
n_cores: &cores 3
# This is an ordered dict that contains the order in which the steps are run.
run_sequence: !!omap
- 1: 'basic_check'
- 2: 'fill_data'
- 3: 'infer_build'
- 4: 'harmonize'
# - 5: 'report_inflation_factors'
# - 6: 'qq_manhattan_plots'
- 7: 'write_pickle'
# - 8: 'write_ldsc'
# - 9: 'write_metal'
- 10: 'write_regenie'
- 11: "sort_alphabetically"
steps:
basic_check: # see https://cloufield.github.io/gwaslab/Standardization/
params:
run: True
gl_params:
normalize: True
n_cores: *cores
infer_build: # see https://cloufield.github.io/gwaslab/InferBuild/
params:
run: True
fill_data: # see https://cloufield.github.io/gwaslab/Conversion/#fill_data
params:
run: True
gl_params:
to_fill: ['MLOG10P', 'Z']
overwrite: False
extreme: False
harmonize: # see https://cloufield.github.io/gwaslab/Harmonization/
params:
run: True
preload_cache: True
flip_allele: False
gl_params:
basic_check: False
n_cores: *cores
ref_seq: "data/GCA_000001405.14_GRCh37.p13_full_analysis_set.fna"
ref_infer: "data/EUR.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz"
ref_alt_freq: "AF"
remove: True
removedup_args: {'mode': "d",'keep': 'first','keep_col': "MLOG10P" }
inferstrand_args: {'cache_options': {'use_cache': True}} # if set to False but preload_cache is True, the (preloaded) cache will be used anyway
liftover: # see https://cloufield.github.io/gwaslab/LiftOver/
params:
run: False
gl_params:
n_cores: *cores
from_build: "19"
to_build: "38"
remove: True
report_summary:
params:
run: False
report_inflation_factors:
params:
run: True
overwrite: False
workspace: "if"
workspace_subfolder: True
report_min_pvalue:
params:
run: False
workspace: "min_P"
workspace_subfolder: True
nrows: 20
qq_manhattan_plots:
params:
run: True
workspace: "plots"
dist: 2
gl_params:
mode: 'mqq'
stratified : True
maf_bins: [[0, 0.01], [0.01, 0.05], [0.05, 1]]
scaled: True
build: "19"
skip: 1
sig_level: 5e-8
cistrans_annotation:
params:
run: False
workspace: "cistrans_tag"
gl_params:
target_gene_map_file_dir: "data/target_gene_mapping_file_v9-12-23.csv"
map_file_chrom_col: "chromosome"
map_file_start_col: "start"
map_file_end_col: "end"
map_file_gene_name: "Entrez_Gene_Name"
gene_pos_file_dir: "data/glist-hg19.txt"
genepos_file_genecol_name: "GENE_NAME"
gwas_files_dir: "path/to/cojo/or/clump/files"
prot_id_column_name: "SeqId"
pval_column: "MLOG10P" # change with actual name of -log10p column
snp_file_pos_col: "POS"
snp_file_chrom_col: "#CHROM"
pvalue_threshold: 7.30102 # -log10 of 5e-8
region_buffer_value: 500000
proximality_threshold: 5000
columns_to_drop: []
write_pickle:
params:
run: True
overwrite: False
workspace: "pickle"
workspace_subfolder: False
write_same_input_format:
params:
run: False
workspace: "outputs"
workspace_subfolder: True
gl_params:
to_csvargs:
compression:
method: "gzip"
compresslevel: 1
mtime: 1
write_tsv:
params:
run: False
workspace: "outputs"
workspace_subfolder: True
gl_params:
fmt: "gwaslab"
# see https://cloufield.github.io/gwaslab/Format/ for more details on writing options
write_ldsc:
params:
run: False
workspace: "ldsc"
workspace_subfolder: True
gl_params:
fmt: "ldsc"
write_metal:
params:
run: True
workspace: "metal"
workspace_subfolder: True
gl_params:
fmt: "metal"
write_regenie:
params:
run: True
workspace: "regenie"
workspace_subfolder: True
gl_params:
fmt: "regenie"
write_vcf:
params:
run: True
workspace: "vcf"
workspace_subfolder: True
study_name_mask: [ True, True, True, False, False, False]
study_name_sep: '.'
gl_params:
fmt: "vcf"
sort_alphabetically:
params:
run: True
gl_params:
n_cores: *cores
# Filename transformation, e.g.: seq.3007.7.gwas.regenie.gz
filename_mask: [ True, True, True, False, False, False]
filename_sep: '.'
# IO
root_path: "results"
log_filename: "gwaspipe.log"
formatbook_path: " " # fill this var if you want to provide a custom formatbook