diff --git a/tools/anndata/export.xml b/tools/anndata/export.xml index aab4d7cc26f..52d8896e572 100644 --- a/tools/anndata/export.xml +++ b/tools/anndata/export.xml @@ -1,90 +1,33 @@ - - Interconvert AnnData and Loom formats + + matrix and annotations macros.xml - - - - - - - - - - - - - - - - + - - hd5_format['filetype'] == 'anndata' and hd5_format['output_format'] == 'loom' - - - hd5_format['filetype'] == 'anndata' and hd5_format['output_format'] == 'tabular' - - - hd5_format['filetype'] == 'anndata' and hd5_format['output_format'] == 'tabular' - - - hd5_format['filetype'] == 'anndata' and hd5_format['output_format'] == 'tabular' - - - hd5_format['filetype'] == 'anndata' and hd5_format['output_format'] == 'tabular' - - - hd5_format['filetype'] == 'anndata' and hd5_format['output_format'] == 'tabular' - - - hd5_format['filetype'] == 'loom' - - - - hd5_format['filetype'] == 'loom' - - + + + + + - - - - - - - - - - - @@ -94,26 +37,9 @@ adata.write_csvs('.', sep="\t", skip_data = False) - - - - - - - - - - - - - `__) -or a Tabular file (`write_csvs method `__) - -It can also create a series of tabular files from an input loom dataset. +This tool exports an AnnData dataset to a tabular files (`write_csvs method `__) @HELP@ ]]> diff --git a/tools/anndata/import.xml b/tools/anndata/import.xml index 8d3ce3692d4..99ab2808a68 100644 --- a/tools/anndata/import.xml +++ b/tools/anndata/import.xml @@ -1,9 +1,9 @@ - - from different format + + from different formats macros.xml - + @@ -13,164 +13,132 @@ - scanpy + scanpy umi_tools_input.gz - && +#if $in.adata_format == 'mtx' + mkdir mtx + #if $in.tenx.use == 'legacy_10x' + && cp '$in.matrix' 'mtx/matrix.mtx' + && cp '$in.tenx.genes' 'mtx/genes.tsv' + && cp '$in.tenx.barcodes' 'mtx/barcodes.tsv' + #else if $in.tenx.use == 'v3_10x' + && cp '$in.matrix' 'mtx/matrix.mtx' + && gzip 'mtx/matrix.mtx' + && cp '$in.tenx.features' 'mtx/features.tsv' + && gzip 'mtx/features.tsv' + && cp '$in.tenx.barcodes' 'mtx/barcodes.tsv' + && gzip 'mtx/barcodes.tsv' #end if - - @CMD@ - - #if $hd5_format.in.adata_format == 'mtx' - && rm -rf mtx - #end if - -#else: - python '$__tool_directory__/tsv_to_loompy.py' - -c '${hd5_format.coldata}' - -r '${hd5_format.rowdata}' - -f '${hd5_format.mainmatrix}' - #if $hd5_format.other_files: - '${hd5_format.other_files}' - #end if + && +#else if $in.adata_format == 'umi_tools' + ## avoid gzipping in the inputdir + gzip -c '$in.input' > umi_tools_input.gz + && #end if + +@CMD@ ]]> - - - - + + + + + + + - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + + + - - - - - - - - - - - - - - - - - - - - - + + + - - - - - + + - - hd5_format['filetype'] == 'anndata' - - - hd5_format['filetype'] == 'loom' - + @@ -191,7 +159,13 @@ adata.write('anndata.h5ad') - + + + + + + + @@ -203,8 +177,8 @@ adata.write('anndata.h5ad') + - @@ -216,8 +190,8 @@ adata.write('anndata.h5ad') + - @@ -226,8 +200,10 @@ adata.write('anndata.h5ad') - - + + + + @@ -242,7 +218,14 @@ adata.write('anndata.h5ad') - + + + + + + + + @@ -257,31 +240,29 @@ adata.write('anndata.h5ad') - + + + + + + + + + - - - - - - - - - - + + + - - - - - - + + + @@ -296,7 +277,7 @@ adata.write('anndata.h5ad') **What it does** -This tool creates an AnnData or loom dataset from several input types: +This tool creates an AnnData from several input types: - Loom (`read_loom method `__) - Tabular (`read_csv method `__) diff --git a/tools/anndata/inspect.xml b/tools/anndata/inspect.xml index 5b4b700ec72..87fed9e8a35 100644 --- a/tools/anndata/inspect.xml +++ b/tools/anndata/inspect.xml @@ -1,4 +1,4 @@ - + object macros.xml @@ -283,7 +283,7 @@ pd.DataFrame(adata.varm['PCs']).to_csv("$varm_PCs", sep="\t", index = False) - + @@ -305,14 +305,14 @@ pd.DataFrame(adata.varm['PCs']).to_csv("$varm_PCs", sep="\t", index = False) - + - + diff --git a/tools/anndata/macros.xml b/tools/anndata/macros.xml index 69494861dcd..fdeef099049 100644 --- a/tools/anndata/macros.xml +++ b/tools/anndata/macros.xml @@ -1,6 +1,7 @@ - 0.10.3 + 0.10.9 0 + 21.09 anndata @@ -19,12 +20,6 @@ - - + + + + + + + diff --git a/tools/anndata/manipulate.xml b/tools/anndata/manipulate.xml index 0534748c3b4..0f8d9153018 100644 --- a/tools/anndata/manipulate.xml +++ b/tools/anndata/manipulate.xml @@ -1,4 +1,4 @@ - + object macros.xml @@ -50,6 +50,27 @@ adata.rename_categories( key='$manipulate.key', categories=$categories) +#else if $manipulate.function == 'remove_keys' + #if $manipulate.obs_keys + #set $keys = [x.strip() for x in str($manipulate.obs_keys).split(',')] +adata.obs = adata.obs.drop(columns=$keys) + #end if + + #if $manipulate.var_keys + #set $keys = [x.strip() for x in str($manipulate.var_keys).split(',')] +adata.var = adata.vars.drop(columns=$keys) + #end if + +#else if $manipulate.function == 'flag_genes' +## adapted from anndata operations + #for $flag in $manipulate.gene_flags +k_cat = adata.var_names.str.startswith('${flag.startswith}') +if k_cat.sum() > 0: + adata.var['${flag.col_name}'] = k_cat +else: + print(f'No genes starting with {'${flag.startswith}'} found.') +#end for + #else if $manipulate.function == 'strings_to_categoricals' adata.strings_to_categoricals() @@ -71,6 +92,14 @@ obs.index = obs_index adata.obs = obs #end if +#else if $manipulate.function == 'split_on_obs' +import os +res_dir = "output_split" +os.makedirs(res_dir, exist_ok=True) +for s,field_value in enumerate(adata.obs["${manipulate.key}"].unique()): + ad_s = adata[adata.obs.${manipulate.key} == field_value] + ad_s.write(f"{res_dir}/${manipulate.key}_{s}.h5ad", compression='gzip') + #else if $manipulate.function == 'filter' #if $manipulate.filter.filter == 'key' #if $manipulate.var_obs == 'var' @@ -126,7 +155,11 @@ adata.raw = adata #end if -adata.write('anndata.h5ad') +#if $manipulate.function != 'split_on_obs' +adata.write('anndata.h5ad', compression='gzip') +print(adata) +#end if + ]]> @@ -137,9 +170,12 @@ adata.write('anndata.h5ad') + + + @@ -167,6 +203,26 @@ adata.write('anndata.h5ad') + + + + + + + + + + + + + + + + + + + + @@ -177,6 +233,15 @@ adata.write('anndata.h5ad') + + + + + + + + + @@ -237,10 +302,16 @@ adata.write('anndata.h5ad') - + + manipulate['function'] != 'split_on_obs' + + + + manipulate['function'] == 'split_on_obs' + - + @@ -256,10 +327,15 @@ adata.write('anndata.h5ad') + - + + + + + - + @@ -268,10 +344,17 @@ adata.write('anndata.h5ad') + - + + + + + + + - + @@ -280,25 +363,39 @@ adata.write('anndata.h5ad') + - + + + + + + + - + - + - + + - + + + + + + + - + @@ -306,10 +403,17 @@ adata.write('anndata.h5ad') + - + + + + + + + - + @@ -317,10 +421,17 @@ adata.write('anndata.h5ad') + - + + + + + + + - + @@ -328,9 +439,20 @@ adata.write('anndata.h5ad') - + + + + + + + + + + + + - + @@ -338,9 +460,20 @@ adata.write('anndata.h5ad') - + + + + + + + + + + + + - + @@ -354,9 +487,18 @@ adata.write('anndata.h5ad') - + + + + + + + + + + - + @@ -372,23 +514,120 @@ adata.write('anndata.h5ad') - + + + + + + + + + + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `__) @@ -408,6 +647,14 @@ The possible manipulations are: Besides calling `self.obs[key].cat.categories = categories` - similar for `var` - this also renames categories in unstructured annotation that uses the categorical annotation `key` +- Remove keys from obs or var annotations + + Helps in cleaning up andata with many annotations. For example, helps in removing qc metrics calculated during the preprocesing or already existing cluster annotations. + +- Flag genes start with a pattern + + Useful for flagging the mitochoncdrial or ribosomal protein genes + - Transform string annotations to categoricals (`strings_to_categoricals method `__) Only affects string annotations that lead to less categories than the total number of observations. @@ -416,7 +663,11 @@ The possible manipulations are: Data matrix is transposed, observations and variables are interchanged. -- Add annotation for variables or observations +- Add annotation for variables or + +- Split the AnnData object into multiple AnnData objects based on the values of a given obs key + + For example, helps in splitting an anndata objects based on cluster annotation. This function generates a collection with number of elements equal to the number of categories in the input obs key. - Filter data variables or observations, by index or key diff --git a/tools/anndata/modify_loom.xml b/tools/anndata/modify_loom.xml index 776e890d74b..96e654836d0 100644 --- a/tools/anndata/modify_loom.xml +++ b/tools/anndata/modify_loom.xml @@ -1,62 +1,191 @@ - - Add layers, or row/column attributes to a loom file + + Manipulate, export and import loom data macros.xml - + + numpy + + + + - - - - - - + + + + + - - + + + + + + + + + + + + + + + + + + - - + + - - + + + + + + + + + + + + + + + + + - + + operation['to_perform'] == 'manipulate' or operation['to_perform'] == 'import' + + + operation['to_perform'] == 'export' + + + + operation['to_perform'] == 'export' + + - - - + + + + + + + + - - - + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +