数据过滤

for i in HEC2  S185  S186 WT1 WT2 WT3;do
	fastp -i ${i}.R1.fq.gz -I ${i}.R2.fq.gz -o  ${i}out.R1.fq.gz -O  ${i}out.R2.fq.gz

for i in HEC2  S185  S186 WT1 WT2 WT3;do
        {
        cores=96
        ref="../../pp_index"
        bowtie2 --end-to-end --very-sensitive --no-mixed \\
        --no-discordant --phred33 -I 10 -X 700 -p ${cores} \\
        -x ${ref} -1 ${i}_1.filtered.fq.gz -2 ${i}_2.filtered.fq.gz \\
        -S ${i}_bowtie2.sam &> ${i}_bowtie2.txt
        samtools view -bS -F 0x04 ${i}_bowtie2.sam > ${i}_bowtie2.mapped.bam
        bedtools bamtobed -i ${i}_bowtie2.mapped.bam -bedpe > ${i}_bowtie2.bed
        awk '$1==$4 && $6-$2 < 1000 {print $0}' ${i}_bowtie2.bed > ${i}_bowtie2.clean.bed
        cut -f 1,2,6 ${i}_bowtie2.clean.bed | sort -k1,1 -k2,2n -k3,3n  > ${i}_bowtie2.fragments.bed
        binLen=500
        awk -v w=$binLen '{print $1, int(($2 + $3)/(2*w))*w + w/2}' ${i}_bowtie2.fragments.bed |\\
        sort -k1,1V -k2,2n |\\
        uniq -c |\\
        awk -v OFS="\\t" '{print $2, $3, $1}' |\\
        sort -k1,1V -k2,2n  >${i}_bowtie2.fragmentsCount.bin$binLen.bed
        }
done

for i in DRR151583  DRR151584;do
        {
        cores=96
        ref="../../p.patens"
        bwa aln ${ref} ${i}.fastq.gz  > ${i}_1.sai
        bwa aln ${ref} ${i}.fastq.gz  > ${i}_2.sai        bwa sampe ${ref} ${i}_1.sai ${i}_2.sai ${i}.fastq.gz ${i}.fastq.gz > ${i}_bwa.sam
        samtools view -bS ${i}_bwa.sam > ${i}_bwa.bam
        samtools sort -o ${i}_bwa.sorted_bam ${i}_bwa.bam
        bedtools bamtobed -i ${i}_bwa.bam -bedpe > ${i}_bwa.bed
        awk '$1==$4 && $6-$2 < 1000 {print $0}' ${i}_bwa.bed > ${i}_bwa.clean.bed
        cut -f 1,2,6 ${i}_bwa.clean.bed | sort -k1,1 -k2,2n -k3,3n  > ${i}_bwa.fragments.bed
        binLen=500
        awk -v w=$binLen '{print $1, int(($2 + $3)/(2*w))*w + w/2}' ${i}_bwa.fragments.bed |\\
        sort -k1,1V -k2,2n |\\
        uniq -c |\\
        awk -v OFS="\\t" '{print $2, $3, $1}' |\\
        sort -k1,1V -k2,2n  >${i}_bwa.fragmentsCount.bin$binLen.bed
        samtools flagstat ${i}_bwa.bam > ${i}_flagstat.txt
        samtools index ${i}_bwa.sorted_bam
        bamCoverage -b ${i}_bwa.sorted_bam -o ${i}.bw --normalizeUsing CPM --binSize 10

        }

done

为蓖麻基因组构建索引

bwa index -p castor -a is castor-fah12.fa

**寻找输入序列的后缀数组位点**   
bwa aln /home/disk2/wangwenbo/genome/castor /home/disk2/wangwenbo
/Dap-seq/p-RcABI3_L2_1002087.R1-1.fastq.gz > p-RcABI3_L2_1002087.R1-1.sai

bwa aln /home/disk2/wangwenbo/genome/castor /home/disk2/wangwenbo
/Dap-seq/p-RcABI3_L2_1002087.R2-1.fastq.gz > p-RcABI3_L2_1002087.R2-1.sai 

生成sam格式的序列比对文件

bwa sampe /home/disk2/wangwenbo/genome/castor p-RcABI3_L2_1002087.R1-1.sai p-RcABI3_L2_1002087.R2-1.sai  /home/disk2/wangwenbo/Dap-seq/p-RcABI3_L2_1002087.R1-1.fastq.gz  /home/disk2/wangwenbo/Dap-seq/p-RcABI3_L2_1002087.R2-1.fastq.gz > p-RcABI3_L2_1002087-1.sam(合并一个)

sam文件转bam文件

samtools view -bS p-RcABI3_L2_1002087-1.sam > p-RcABI3_L2_1002087-1.bam
samtools sort -o p-RcABI3_L2_1002087-1.sorted.bam p-RcABI3_L2_1002087-1.bam

合并

samtools merge p-RcABI3_L2_1002087-1-merge.sorted.bam p-RcABI3_L2_1002087-1.sorted.bam p-RcABI3_L2_1002087-2.sorted.bam