-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrunRNAseq-Pi_BOWTIE.sh
More file actions
88 lines (66 loc) · 3.31 KB
/
runRNAseq-Pi_BOWTIE.sh
File metadata and controls
88 lines (66 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
# runRNaseq-Pi.sh
# Example of RNAseq-Pi script/modules filter functions, all this functions will
# be controlled in the real pipeline.
# Copyright (C) 2011 Juan Caballero [Institute for Systems Biology]
# add our Perl scripts in your PATH (check read/exec permisions first)
export PATH=$PATH:/proj/hoodlab/share/programs/RNAseq-Pi/bin
# NPROC -> number of CPUs to use
NPROC=4
# Compressor to use
ZIP=gzip
# Location of the indexes
BWINDEX=/proj/hoodlab/share/programs/indexes
# Bowtie
export PATH=$PATH:/path/to/bowtie
BWPARAM="--quiet -p $NPROC -S --sam-nohead -k 1 -v 2 -q"
# MapSplice
MAPSPLICE=/proj/hoodlab/share/programs/mapsplice/bin/mapsplice_segments.py
GENOME=hs37.61
GENOMEDIR=/proj/hoodlab/share/programs/Ensembl
READSIZE=75
MSPARAM="-c $GENOMEDIR/$GENOME.fa -B $BWINDEX/$GENOME -t $GENOMEDIR/$GENOME.pseudo.gtf -w $READSIZE -L 25 -Q fq -X $NPROC --full-running --semi-canonical"
# Cufflinks
export PATH=$PATH:/proj/hoodlab/share/programs/cufflinks
CFPARAM="-r $GENOMEDIR/$GENOME.gtf -s $GENOMEDIR/$GENOME.fa"
for FQ in *.fq # Process all the FASTQ files in the current directory
do
ID=${FQ%fq}
# cascade mode, if you need to change step order, must change outputs/inputs
# step1 -> filter low quality reads
filterQuality.pl -i $FQ -o $ID.qual_OK.fq -b $ID.bowtie_qual_BAD.fq
# step2 -> remove vector matches
bowtie $BWPARAM --un $ID.vector_OK.fq $BWINDEX/UniVec_Core $ID.qual_OK.fq | perl -lane 'print unless($F[1] == 4)' > $ID.bowtie_vector_BAD.sam
# step3 -> filter low complexity reads
filterLowComplex.pl -i $ID.vector_OK.fq -o $ID.complex_OK.fq -b $ID.bowtie_complex_BAD.fq
# step4 -> remove rRNA/MT matches
bowtie $BWPARAM --un $ID.riboMT_OK.fq $BWINDEX/human.GRCh37.61.rRNA-MT $ID.complex_OK.fq | perl -lane 'print unless($F[1] == 4)' > $ID.bowtie_riboMT_BAD.sam
# step5 -> remove repeats matches
bowtie $BWPARAM --un $ID.repeat_OK.fq $BWINDEX/human_RepBase15.10 $ID.riboMT_OK.fq | perl -lane 'print unless($F[1] == 4)' > $ID.bowtie_repeat_BAD.sam
# step6 -> remove ERCC matches
bowtie $BWPARAM --un $ID.ercc_OK.fq $BWINDEX/ERCC_reference_081215 $ID.repeat_OK.fq | perl -lane 'print unless($F[1] == 4)' > $ID.bowtie_ercc_BAD.sam
# collect the filtered reads
mv $ID.ercc_OK.fq $ID.bowtie_FILTERED.fq
# compress temporary files
for FILE in *BAD.fq *BAD.sam
do
$ZIP $FILE
done
# remove temporary files
rm *_OK.fq
# step7 -> alignment with MapSplice
python $MAPSPLICE $MSPARAM -u $ID.bowtie_FILTERED.fq -o $ID.mapsplice
grep chr $ID.mapsplice/alignments.sam | perl -lane '$F[1] == 16 ? print "$_\tXS:A:-" : print "$_\tXS:A:+"' > $ID.mapsplice.sam
# if we ran using fusion detection, uncomment the next line to keep the file
#mv $ID.mapsplice/fusion_junction $ID.mapsplice.fusion_junction
rm -rf $ID.mapsplice
# step8 -> Cufflink assembly and transcript quantification
sort -k 3,3 -k 4,4n $ID.mapsplice.sam > $ID.mapsplice.sorted.sam
rm $ID.mapsplice.sam
mv $ID.mapsplice.sorted.sam $ID.mapsplice.sam
cufflinks -q -p $NPROC -o $ID.cufflinks $ID.mapsplice.sam
cuffcompare $CFPARAM -o $ID.cuff $ID.cufflinks/transcripts.gtf
# transcript annotation and quantification is in $ID.cufflinks/transcripts.tmap
$ZIP $ID.mapsplice.sam
# END
done