Skip to content

Commit

Permalink
git clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
Sophie - Luise Heidig authored and Sophie - Luise Heidig committed Mar 21, 2024
1 parent 98cd53a commit 6d9d028
Show file tree
Hide file tree
Showing 9 changed files with 12 additions and 125 deletions.
17 changes: 5 additions & 12 deletions bin/map_dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ def handle_unmappable(df):
# Iterate over rows where df.same == False
for index, row in df[df['same'] == False].iterrows():
if type(row['model']) == str :
print(row)
#print(row)
fixed=False
model_seq = row['model']
unali_seq = row['unali']
dssp = row['dssp']

print(unali_seq)
print(model_seq)
print(dssp)
#print(unali_seq)
#print(model_seq)
#print(dssp)

if len(model_seq)==len(unali_seq): #unmatching is because of point mutations, we ignore point mutations
counter = 0
Expand All @@ -66,7 +66,7 @@ def handle_unmappable(df):
else: #either the sequence of entry of the sequence of the 3D structure/model have different length
# align
alignments = pairwise2.align.globalms(model_seq, unali_seq,2, -1, -10, -0.1)
print(alignments)
#print(alignments)
aligned_model_seq = alignments[0][0]
aligned_unali_seq = alignments[0][1]

Expand Down Expand Up @@ -207,13 +207,6 @@ def write_fasta_from_df(df,outname):
line = df.iloc[i,0]+"_dssp" + "\n"+ str(df.iloc[i,3])
lines.append(line)

#line = df.iloc[i,0]+"_seq_inputted" + "\n"+ df.iloc[i,1]
#lines.append(line)
#line = df.iloc[i,0]+"_seq_model" + "\n"+ df.iloc[i,2]
#lines.append(line)
#line = df.iloc[i,0]+"_dssp" + "\n"+ df.iloc[i,3]
#lines.append(line)

with open (out_name, 'w') as m:
for line in lines:
line = line.replace('\\n','\n').replace(' ','')
Expand Down
65 changes: 0 additions & 65 deletions bin/shannons_entropy.py

This file was deleted.

8 changes: 5 additions & 3 deletions magic_align.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@ data=toy_example
house=$(pwd)
now=`date +"%Y_%m_%d_%H_%M_%S"`
output_name=${data}_${now}_test
output_folder=$house/results/$output_name
output_folder=$house/$data/results/$output_name

mkdir -p $house/results/
mkdir -p $output_folder
echo 'Starting nextflow'

nextflow run simsapiper.nf \
-profile server,withsingularity \
-profile standard,withdocker \
--data $house/$data/data \
--magic \
--outFolder $output_folder \
|& tee $output_folder/run_report_$output_name.nflog
| tee $output_folder/run_report_$output_name.nflog
sessionName=$(sed -n '2s/.*\[\(.*\)\].*/\1/p' $output_folder/run_report_$output_name.nflog)
nextflow log | grep $sessionName >> $output_folder/run_report_$output_name.nflog

2 changes: 1 addition & 1 deletion magic_hydra.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

data=toy_example

module load Nextflow/23.04.2
module load Nextflow/23.10.0
house=$VSC_SCRATCH_VO_USER/simsapiper
now=`date +"%Y_%m_%d_%H_%M_%S"`
output_name=${data}_${now}_test
Expand Down
2 changes: 0 additions & 2 deletions modules/msas.nf
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,6 @@ process squeeze{
"""
python3 $projectDir/bin/squeeze_msa.py $msa $dssp "$squeeze" $squeezePerc squeezed_${msa.baseName}
"""
//INFO:
//choose conserved secondary structure elements according to dssp across your dataset as it is does elements that TCOFFEE will use to align your proteins
}

process reorder{
Expand Down
9 changes: 0 additions & 9 deletions modules/structures.nf
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,11 @@ process runDssp{
echo Gate is open $gate
mkdssp -i $model -o ${model.baseName}.dssp
"""
//INFO: secondary structure elements according to dssp
//H = alpha-helix
//B = beta-bridge residue
//E = extended strand (in beta ladder)
//G = 3/10-helix
//I = 5-helix
//T = H-bonded turn
//S = beta-bend or beta-turn
}


process esmFolds{
publishDir "$params.structures", mode: "copy"
//errorStrategy { task.attempt > 3 ? 'retry' : 'complete' }

input:
path structureless
Expand Down
26 changes: 1 addition & 25 deletions modules/utils.nf
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@ process attendance{
echo 'No. of sequences in final alignment: ' \$fin >> "sequence_report.txt"
av_conservation=`python3 $projectDir/bin/shannons_entropy.py $finalMsa`
echo 'Average sequence conservation (Shannons Entropy): ' \$av_conservation
if (( \$fin !=$collapsedSequencesCount + $structurelessCount ))
then
echo "ERROR: Not all valid sequences are found in the output file, please check $finalMsa in in the output directory!"
Expand Down Expand Up @@ -315,25 +311,5 @@ process createSummary{
cp .command.out \$outfile
"""

// add new dependency
//python3 $projectDir/bin/sequence_sim.py $finalMSA


//av_conservation=`python3 $projectDir/bin/shannons_entropy.py $finalMsa`
// echo 'Average sequence conservation (Shannons Entropy): ' \$av_conservation

//md improvemends
//echo \$(readlink -f $seqsInvalidFile)
//inputSeqFilePath=\$(readlink -f $inputSeqFiles )
//echo '[$inputSeqFiles]('\$inputSeqFilePath')'

//[link](file:///Users/matb/Desktop/cat.gif)
//echo '<a href="file://'\$inputSeqFilesPath'">link</a>'

//inputSeqFilesPath=\$(readlink -f $inputSeqFiles)
//echo '\n SIMSApiper found these files: ![$inputSeqFiles](file://'\$inputSeqFilesPath')'


"""
}
3 changes: 0 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@ if (params.squeeze){params.dssp = true}

//minimal parameter value should be 1
//if (params.localModel){params.localModel = 1}
//type test localModel
//if (params.localModel) {assert Number.isCase(params.useSubsets), " localModel can only be 'false' or a number, please check your launch file or command line"}


report {
enabled = true
Expand Down
5 changes: 0 additions & 5 deletions simsapiper.nf
Original file line number Diff line number Diff line change
Expand Up @@ -250,11 +250,6 @@ workflow {
seqs_to_model = writeFastaFromMissing.out.found

esmFolds(seqs_to_model)
//if seqs_to_model is empty, the pipeline does not complete, but if it is not empty, strucQC needs to wait for esm?
//esmStructuresCounter= Channel.fromPath("$params.structures/*.pdb").count()
//this does not work as a gate


foundSequencesCount = finalModelFound.mix(esmFolds.out.esmFoldsStructures).count()

structureless_seqs=Channel.empty()
Expand Down

0 comments on commit 6d9d028

Please sign in to comment.