这个流程主要是将上游Smart-seq2 的 fastq 数据转换成单细胞表达矩阵
# Global variable
# Tips:
# This pipline is use to get the scRNA-seq expression Matrix from Smart-seq2 fastq files.
#
# Date: 2021-10-04 10:10:11
# Best Regards,
# Yuan.Sh (Major in Bioinfomatics & Deep-learning)
# ---------------------------------------
# School of Basic Medical Sciences,
# Fujian Medical University,
# Fuzhou, Fujian, China.
# please contact with me via the following ways:
# (a) e-mail :yuansh3354@163.com
# (b) QQ : 1044532817
#########################################################
### Step 00: Set working directory
wkd='/media/yuansh/14THHD/HCC_CTC'
processfile=$wkd/processfile
rawdata=$wkd/rawdata
hista2_index=/media/yuansh/1THHD/Yuansh_Share/bioinformatics/reference/index/hisat/hg38/genome
gft=/media/yuansh/1THHD/Yuansh_Share/bioinformatics/reference/genome/hg38/Homo_sapiens.GRCh38.104.gtf.gz
Matrix_name=HCC_CTC
#########################################################
start=$(date +%s)
echo "Script starts executing ..."
echo "Start Time:"`date`
echo "The main working directory is" $wkd
#########################################################
# Main program
#########################################################
### Step 01: First FastQC
echo "First fastqc is processing ..."
echo "First fastqc starts at `date`"
conda activate RNA-seq
fastq=${rawdata}/fastq
fastq_qc=${processfile}/fastq_qc
if [ ! -d $fastq_qc ];then
mkdir -p $fastq_qc
fastqc -o $fastq_qc -t 28 ${fastq}/*.fq.gz
multiqc $fastq_qc/ -o ${processfile}/multiqc
fi
echo "First fastqc ends at `date`"
conda deactivate
### Step 02: Trimmomatic
echo "Trimmomatic is processing ..."
echo "Trimmomatic starts at `date`"
conda activate RNA-seq
ls ${fastq}/*_1.(fq|fastq).gz >${processfile}/1
ls ${fastq}/*_2.(fq|fastq).gz >${processfile}/2
paste -d' ' ${processfile}/1 ${processfile}/2 > ${processfile}/config
fastq_clean=${processfile}/fastq_clean
if [ ! -d $fastq_clean ];then
mkdir -p $fastq_clean
fi
echo "cat ${processfile}/config |while read id
do
fq1=\`echo \$id|cut -d' ' -f1\`
fq2=\`echo \$id|cut -d' ' -f2\`
trim_galore -q 25 --core 12 --phred33 --length 36 -e 0.1 --stringency 3 --paired -o $fastq_clean \$fq1 \$fq2
done" > ${wkd}/code/trim.sh
zsh ${wkd}/code/trim.sh
conda deactivate
echo "Trimmomatic ends at `date`"
### Step 02.01: Trimmomatic_qc
echo "Trimmomatic fastqc is processing ..."
echo "Trimmomatic fastqc starts at `date`"
conda activate RNA-seq
fastq_trim_qc=${processfile}/fastq_trim_qc
if [ ! -d $fastq_trim_qc ];then
mkdir -p $fastq_trim_qc
fastqc -o $fastq_trim_qc -t 32 ${fastq_clean}/*.fq.gz
multiqc $fastq_trim_qc/ -o ${processfile}/multiqc_trim
fi
conda deactivate
echo "Trimmomatic fastqc ends at `date`"
### Step 03: Hista2
echo "Hista2 is processing ..."
echo "Hista2 starts at `date`"
conda activate RNA-seq
ls ${fastq_clean}/*_1.(fq|fastq).gz >${processfile}/1
ls ${fastq_clean}/*_2.(fq|fastq).gz >${processfile}/2
paste -d' ' ${processfile}/1 ${processfile}/2 > ${processfile}/hista2_config
sam=${processfile}/sam
if [ ! -d $sam ];then
mkdir -p $sam
fi
echo "cat ${processfile}/hista2_config |while read id
do
fq1=\`echo \$id|cut -d' ' -f1\`
fq2=\`echo \$id|cut -d' ' -f2\`
sample_name=\`basename \$fq1 _1_val_1.fq.gz\`
hisat2 -p 16 -x $hista2_index -1 \${fq1} -2 \${fq2} -S $sam/\${sample_name}.hisat.sam;
done" > ${wkd}/code/run_hista2.sh
zsh ${wkd}/code/run_hista2.sh
echo "Hista2 ends at `date`"
conda deactivate
### Step 04: sam to bam
echo "Sam to Bam is processing ..."
echo "Sam to Bam starts at `date`"
conda activate RNA-seq
bam=${processfile}/bam
if [ ! -d $bam ];then
mkdir -p $bam
fi
ls $sam/*.sam|while read id ;do (samtools sort -O bam -@ 5 -o $bam/$(basename ${id} ".sam").bam ${id});done
ls $bam/*.bam |xargs -i samtools index {}
echo "Sam to Bam ends at `date`"
conda deactivate
### Step 05: counts Matrix
echo "featureCounts is processing ..."
echo "featureCounts starts at `date`"
conda activate RNA-seq
featureCounts -T 28 \
-p \
-t exon \
-g gene_id \
-a $gtf \
-o ${Matrix_name}.txt $bam/*.bam \
1>counts.id.log 2>&1
echo "featureCounts ends at `date`"
conda deactivate
#########################################################
conda deactivate
#########################################################
end=$(date +%s)
take=$(( end - start ))
h=$(( take / 3600 ))
m=$((( take - 3600 * h ) / 60 ))
s=$((( take - 3600 * h - 60 * m )))
echo Script execution completed, Time: ${h} hours ${m} minutes ${s} seconds
echo "Script execution completed:"`date`
#########################################################