Smart-seq2 转 Count 矩阵

这个流程主要是将上游Smart-seq2 的 fastq 数据转换成单细胞表达矩阵

# Global variable
# Tips:
#      This pipline is use to get the scRNA-seq expression Matrix from Smart-seq2 fastq files.
#      
# Date: 2021-10-04 10:10:11
# Best Regards,  
# Yuan.Sh (Major in Bioinfomatics & Deep-learning)
# ---------------------------------------
# School of Basic Medical Sciences,  
# Fujian Medical University,  
# Fuzhou, Fujian, China.  
# please contact with me via the following ways:  
# (a) e-mail :yuansh3354@163.com 
# (b) QQ : 1044532817
#########################################################

### Step 00: Set working directory 
wkd='/media/yuansh/14THHD/HCC_CTC'
processfile=$wkd/processfile
rawdata=$wkd/rawdata
hista2_index=/media/yuansh/1THHD/Yuansh_Share/bioinformatics/reference/index/hisat/hg38/genome
gft=/media/yuansh/1THHD/Yuansh_Share/bioinformatics/reference/genome/hg38/Homo_sapiens.GRCh38.104.gtf.gz
Matrix_name=HCC_CTC
#########################################################

start=$(date +%s)
echo "Script starts executing ..."
echo "Start Time:"`date` 
echo "The main working directory is" $wkd

#########################################################
# Main program
#########################################################

### Step 01: First FastQC 

echo "First fastqc is processing ..."
echo "First fastqc starts at `date`"
conda activate RNA-seq

fastq=${rawdata}/fastq
fastq_qc=${processfile}/fastq_qc
if [ ! -d $fastq_qc ];then
   mkdir -p $fastq_qc
   fastqc -o $fastq_qc -t 28 ${fastq}/*.fq.gz
   multiqc $fastq_qc/ -o ${processfile}/multiqc
fi
echo "First fastqc ends at `date`"
conda deactivate 

### Step 02: Trimmomatic

echo "Trimmomatic is processing ..."
echo "Trimmomatic starts at `date`"
conda activate RNA-seq

ls ${fastq}/*_1.(fq|fastq).gz >${processfile}/1
ls ${fastq}/*_2.(fq|fastq).gz >${processfile}/2
paste -d' ' ${processfile}/1 ${processfile}/2 > ${processfile}/config

fastq_clean=${processfile}/fastq_clean
if [ ! -d $fastq_clean ];then
   mkdir -p $fastq_clean
fi

echo "cat ${processfile}/config |while read id
do
        
        fq1=\`echo  \$id|cut -d' ' -f1\`
        fq2=\`echo  \$id|cut -d' ' -f2\`
trim_galore -q 25 --core 12 --phred33 --length 36 -e 0.1 --stringency 3 --paired -o $fastq_clean  \$fq1 \$fq2 
done" >  ${wkd}/code/trim.sh

zsh ${wkd}/code/trim.sh 

conda deactivate 
echo "Trimmomatic ends at `date`"

### Step 02.01: Trimmomatic_qc

echo "Trimmomatic fastqc is processing ..."
echo "Trimmomatic fastqc starts at `date`"
conda activate RNA-seq

fastq_trim_qc=${processfile}/fastq_trim_qc
if [ ! -d $fastq_trim_qc ];then
   mkdir -p $fastq_trim_qc
   fastqc -o $fastq_trim_qc -t 32 ${fastq_clean}/*.fq.gz
   multiqc $fastq_trim_qc/ -o ${processfile}/multiqc_trim
fi

conda deactivate 
echo "Trimmomatic fastqc ends at `date`"

### Step 03: Hista2 

echo "Hista2 is processing ..."
echo "Hista2 starts at `date`"
conda activate RNA-seq

ls ${fastq_clean}/*_1.(fq|fastq).gz >${processfile}/1
ls ${fastq_clean}/*_2.(fq|fastq).gz >${processfile}/2
paste -d' ' ${processfile}/1 ${processfile}/2 > ${processfile}/hista2_config

sam=${processfile}/sam
if [ ! -d $sam ];then
   mkdir -p $sam
fi

echo "cat ${processfile}/hista2_config |while read id
do
        
        fq1=\`echo  \$id|cut -d' ' -f1\`
        fq2=\`echo  \$id|cut -d' ' -f2\`
        sample_name=\`basename \$fq1 _1_val_1.fq.gz\`
hisat2 -p 16 -x $hista2_index -1 \${fq1} -2  \${fq2} -S $sam/\${sample_name}.hisat.sam;
done" >  ${wkd}/code/run_hista2.sh

zsh ${wkd}/code/run_hista2.sh 


echo "Hista2 ends at `date`"
conda deactivate


### Step 04: sam to bam 

echo "Sam to Bam is processing ..."
echo "Sam to Bam starts at `date`"
conda activate RNA-seq

bam=${processfile}/bam
if [ ! -d $bam ];then
   mkdir -p $bam
fi

ls $sam/*.sam|while read id ;do (samtools sort -O bam -@ 5  -o $bam/$(basename ${id} ".sam").bam   ${id});done
ls $bam/*.bam |xargs -i samtools index {}


echo "Sam to Bam ends at `date`"
conda deactivate

### Step 05: counts Matrix

echo "featureCounts is processing ..."
echo "featureCounts starts at `date`"
conda activate RNA-seq

featureCounts -T 28 \
              -p \
              -t exon \
              -g gene_id \
              -a $gtf \
              -o  ${Matrix_name}.txt  $bam/*.bam \
              1>counts.id.log 2>&1 


echo "featureCounts ends at `date`"
conda deactivate


#########################################################

conda deactivate 
#########################################################
end=$(date +%s) 
take=$(( end - start ))
h=$(( take / 3600 ))
m=$((( take - 3600 * h ) / 60 ))
s=$((( take - 3600 * h - 60 * m )))
echo Script execution completed, Time: ${h} hours ${m} minutes ${s} seconds
echo "Script execution completed:"`date` 
#########################################################
上一篇:js大小写的转换 toLowerCase toUpperCase


下一篇:C. Chef Monocarp