-
Notifications
You must be signed in to change notification settings - Fork 20
/
log.txt
127 lines (114 loc) · 8.99 KB
/
log.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# extend all off-targets add 40 bases on each side
#rm -rf extBed/*
#for i in *.bed; do cat $i | gawk '{OFS="\t"; $2-=40; $3+=37; print}' > extBed/$i; done
# merge study data into one file
cat origData/*/convert.tab | sort -k1,1 -k3,3gr | cat origData/headers.txt - > offtargets.tsv
# count by study
less offtargets.tsv | grep -v name | cut -d_ -f1 | tabUniq -rs
# count strong by study
less offtargets.tsv | awk '($3>0.01)' | grep -v name | cut -d_ -f1 | tabUniq -rs
#
# number of tested guide sequences: 30
cat offtargets.tsv | grep on-target | grep -v ^name | cut -f2 | sort -u | wc -l
# number of assays: 36
cut -f1 offtargets.tsv | grep -v name | sort -u | wc -l
# number of studies: 7
cut -f1 offtargets.tsv | grep -v name | cut -d_ -f1 | cut -d/ -f1 | sort -u | wc -l
# number of off-targets: 697
grep -v name offtargets.tsv | grep -v on-target | wc -l
# number of strong off-targets: 160
grep -v name offtargets.tsv | grep -v on-target | awk '($3>0.01)' | grep -v on-taret | wc -l
# number of off-targets falling on the two outliers: 284
less offtargets.tsv | grep -v ^name | grep -v on-target | egrep '_site2|sgRNA4' | wc -l
calc 284/697
# filter out two GC rich
#less offtargets.tsv | egrep -v '_site2|sgRNA4|Frock_RAG1A|Frock_VEGFA' > offtargetsFilt.tsv
less offtargets.tsv | egrep -v '_site2|sgRNA4|Frock_VEGFA|Frock_RAG1A' > out/offtargetsFilt.tsv
egrep -v ^name | grep -v on-target | wc -l
# number of filtered off-target with > 0.1% : 211
egrep -v ^name offtargetsFilt.tsv | grep -v on-target | awk '($3>0.001)' | wc -l
# number of filtered off-target with > 1% : 107
egrep -v ^name offtargetsFilt.tsv | grep -v on-target | awk '($3>0.01)' | wc -l
# SECTION alternative PAMs
# cat offtargetsFilt.tsv | grep -v on- | awk '($3>0.01)' | awk '($2!~/GG$/)' | wc -l
# number of strong filtered off-targets
cat offtargetsFilt.tsv | grep -v on- | awk '($3>0.01)' | wc -l
# number of strong filtered off-targets with main PAMs
grep -v ^name offtargetsFilt.tsv | grep -v on- | awk '($3>0.01)' | awk '(($2~/AG$/) || ($2~/GG$/) || ($2~/GA$/))' | wc -l
# filtered off-targets with main PAMs
grep -v ^name offtargetsFilt.tsv | grep -v on- | awk '($3>0.001)' | awk '(($2~/AG$/) || ($2~/GG$/) || ($2~/GA$/))' | wc -l
# any off-target with non-NGG/NGA/NAG PAM
grep -v ^name offtargetsFilt.tsv | grep -v on- | awk '(!(($2~/AG$/) || ($2~/GG$/) || ($2~/GA$/)))' | wc -l
echo alternative PAMs require better matches
echo mismatch comparison
cat annotFiltOfftargets.tsv | awk '($3~/GG$/)' | awk '($6<=3) {lt3+=1} {all+=1} END {print lt3, all, lt3/all}'
cat annotFiltOfftargets.tsv | awk '($3~/AG$/ || $3~/GA$/)' | awk '($6<=3) {lt3+=1} {all+=1} END {print lt3, all, lt3/all}'
echo off-target score comparison
cat annotFiltOfftargets.tsv | awk '($3~/GG$/)' | awk '($7>1.0) {gt1+=1} {all+=1} END {print gt1, all, gt1/all}'
cat annotFiltOfftargets.tsv | awk '($3~/AG$/ || $3~/GA$/)' | awk '($7>1.0) {gt1+=1} {all+=1} END {print gt1, all, gt1/all}'
#
# get the predicted targets
for i in fa/*.fa; do base=`basename $i .fa`; /var/www/crisporMax/crispor.cgi hg19 $i guides/$base.tsv -o crisporOfftargets/$base.tsv --minAltPamScore=0.0 -d --maxOcc=1000000; done
# alternative PAMs with more than 10% read share
cat annotFiltOfftargets.tsv | awk '($3!~/GG$/)' | awk '($5>0.1)'
# strong filtered offtargets
cat offtargetsFilt.tsv | awk '($3>0.01)' | grep -v on- | wc -l
# strong filtered offtargets that are alternative PAMs
cat offtargetsFilt.tsv | awk '($3>0.01)' | grep -v on- | awk '($2!~/GG$/)' | wc -l
less offtargets.tsv | egrep '_site2|sgRNA4|Frock_RAG1A|Frock_VEGFA' | wc -l
less offtargets.tsv | egrep '_site2|sgRNA4|Frock_RAG1A|Frock_VEGFA' | grep -v on- | wc -l
tabToHtml offtargets.tsv > offtargets.html
scp offtargets.html dev:public_html/concordet/
cat offtargetsFilt.tsv | awk '($3>0.01)' | grep -v name | wc -l
cat offtargets.tsv | grep -v name | awk '($3>0.01)' | wc -l
rsync -avzp tefor:offtargets/crisporOfftargets/ ./crisporOfftargets/
less annotOfftargets.tsv | awk '($6>=0.00001 && $7>=5 && ($7-$9)>=2)' | grep -v site2
less annotOfftargets.tsv | awk '($1=="name" || ($6>=0.01 && $7>=5 && ($7-$9)<2))' | grep -v site2
less annotOfftargets.tsv | awk '($1=="name" || ($6>=0.01 && $7>=5 && ($7-$9)>=2))' | grep -v site2
rsync -avzp tefor:offtargets/crisporOfftargetsGapped/ ./crisporOfftargetsGapped/
less out/annotFiltOfftargets.tsv | awk '($6=="mismatches" || $6>=5)' > plus5Mismatches.tsv
#tar cvfz crisporEffScores1.tar.gz crisporEffScores.py bin
#scp crisporEffScores1.tar.gz dev:public_html/crispor/
cp out/annotOfftargets.tsv ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFile1.tsv
cp out/mitCrisporSensDiff.tsv ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFile4.tsv
cp out/compEffScores-valid.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFile3.pdf
cp out/gcOtCount.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig1.pdf
cp out/venn.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig1.pdf
cp out/specScoreMMComp.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig3.pdf
cp out/specScoreMMComp.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig2.pdf
cp out/roc.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig3.pdf
tabCat effData/*.scores.tab > ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFile2.tsv
cp out/specScoreMMComp.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig3.pdf
cp out/gcSpecScores.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig1.pdf
cp out/specScoreVsOtCount-MIT.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig4.pdf
cp out/heatMap.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig5.pdf
zless ensGene.danRer10.tab.gz | grep -v bin | cut -f2- | genePredToBed stdin ensGene.danRer10.bed
zless ensGene.danRer10.tab.gz | grep -v bin | cut -f2- | genePredToBed stdin stdout | bedToExons stdin ensGene.exons.danRer10.bed
bedtools closest -a effData/bed/alenaAll.bed -b locusMapping/ensGene.exonSyms.danRer10.bed | cut -f4,10 | sed -e 's/Perrine[0-9]*/Perrine/g' | sed -e 's/Heleneg[0-9]*/Helene/g' | sed -e 's/YAPR[0-9]*/Yuvia/g' | sed -e 's/Antoine[0-9]*/Antoine/g' | sed -e 's/Angelo[0-9]*/Angelo/' | sed -e 's/\.[0-9]//' | tabUniq -s | sed -E 's/ ([0-9]*)$/=\1/' | hashFlatten /dev/stdin | sed -e 's/\//;/g'
bedtools closest -a effData/bed/morenoMateos2015.bed -b locusMapping/ensGene.exonSyms.danRer10.bed | cut -f10 | tabUniq -rs | cut -f1 > /tmp/morenoMateos.txt
bedtools closest -a effData/bed/alenaAll.bed -b locusMapping/ensGene.exonSyms.danRer10.bed | cut -f4,10 | sed -e 's/Perrine[0-9]*/Perrine/g' | sed -e 's/Heleneg[0-9]*/Helene/g' | sed -e 's/YAPR[0-9]*/Yuvia/g' | sed -e 's/Antoine[0-9]*/Antoine/g' | sed -e 's/Angelo[0-9]*/Angelo/' | sed -e 's/\.[0-9]//' | tabUniq -s | sed -E 's/ ([0-9]*)$/=\1/' | hashFlatten /dev/stdin | sed -e 's/\//;/g' | less
# number of guide assays (not counting guides tested in two cell lines)
less offtargets.tsv | grep on- | cut -f1 | grep -v Hap1 | grep -v Seq | sort -u | wc -l
cp out/mitCrisporSensDiff.tsv ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppTable3.tsv
cp out/venn-EMX1.tsv ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppTable1.tsv
less crisporOfftargets/Tsai_EMX1.tsv | awk '($4<=4)' | wc -l
less mitOfftargets/Tsai_EMX1.csv | wc -l
#
# number of off-targets, by study
cat offtargets.tsv | awk '($3!=0)' | grep -v on-target | grep -v name | cut -d_ -f1 | tabUniq -rs
cat offtargets.tsv | grep -v off-target | grep -v name | cut -d_ -f1 | tabUniq -rs
cat out/annotOfftargets.tsv | awk 'BEGIN {FS="\t"} ($12!="" || $15!="")' | less
cp out/mismatchFraction-min01Perc.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig1.pdf
cp out/mitCrisporSensDiff.tsv ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFile1.tsv
convert -density 300 ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig3.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFile1.png
convert -density 200 ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig1.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig1.png
#cp out/gcOtCount.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig1.pdf
cp out/heatMap.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig5.pdf
convert -density 200 ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig1.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig1.png
cp out/venn.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig2.pdf
convert -density 200 ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig2.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig2.png
convert -density 200 ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig4.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig4.png
cp out/precRecall.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFig5.pdf
cp out/compEffScores-valid.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/suppFile3.pdf
cp out/mismatchFraction-min01Perc.pdf ~/Dropbox/articles/crispor/crisporArticleSupplFiles/fig1.pdf
less effData/schoenig.scores.tab | cut -f2,3,7,21 | awk '{print $1, substr($3,31,24), $4}'