数据过滤
for i in HEC2 S185 S186 WT1 WT2 WT3;do
fastp -i ${i}.R1.fq.gz -I ${i}.R2.fq.gz -o ${i}out.R1.fq.gz -O ${i}out.R2.fq.gz
for i in HEC2 S185 S186 WT1 WT2 WT3;do
{
cores=96
ref="../../pp_index"
bowtie2 --end-to-end --very-sensitive --no-mixed \\
--no-discordant --phred33 -I 10 -X 700 -p ${cores} \\
-x ${ref} -1 ${i}_1.filtered.fq.gz -2 ${i}_2.filtered.fq.gz \\
-S ${i}_bowtie2.sam &> ${i}_bowtie2.txt
samtools view -bS -F 0x04 ${i}_bowtie2.sam > ${i}_bowtie2.mapped.bam
bedtools bamtobed -i ${i}_bowtie2.mapped.bam -bedpe > ${i}_bowtie2.bed
awk '$1==$4 && $6-$2 < 1000 {print $0}' ${i}_bowtie2.bed > ${i}_bowtie2.clean.bed
cut -f 1,2,6 ${i}_bowtie2.clean.bed | sort -k1,1 -k2,2n -k3,3n > ${i}_bowtie2.fragments.bed
binLen=500
awk -v w=$binLen '{print $1, int(($2 + $3)/(2*w))*w + w/2}' ${i}_bowtie2.fragments.bed |\\
sort -k1,1V -k2,2n |\\
uniq -c |\\
awk -v OFS="\\t" '{print $2, $3, $1}' |\\
sort -k1,1V -k2,2n >${i}_bowtie2.fragmentsCount.bin$binLen.bed
}
done
for i in DRR151583 DRR151584;do
{
cores=96
ref="../../p.patens"
bwa aln ${ref} ${i}.fastq.gz > ${i}_1.sai
bwa aln ${ref} ${i}.fastq.gz > ${i}_2.sai bwa sampe ${ref} ${i}_1.sai ${i}_2.sai ${i}.fastq.gz ${i}.fastq.gz > ${i}_bwa.sam
samtools view -bS ${i}_bwa.sam > ${i}_bwa.bam
samtools sort -o ${i}_bwa.sorted_bam ${i}_bwa.bam
bedtools bamtobed -i ${i}_bwa.bam -bedpe > ${i}_bwa.bed
awk '$1==$4 && $6-$2 < 1000 {print $0}' ${i}_bwa.bed > ${i}_bwa.clean.bed
cut -f 1,2,6 ${i}_bwa.clean.bed | sort -k1,1 -k2,2n -k3,3n > ${i}_bwa.fragments.bed
binLen=500
awk -v w=$binLen '{print $1, int(($2 + $3)/(2*w))*w + w/2}' ${i}_bwa.fragments.bed |\\
sort -k1,1V -k2,2n |\\
uniq -c |\\
awk -v OFS="\\t" '{print $2, $3, $1}' |\\
sort -k1,1V -k2,2n >${i}_bwa.fragmentsCount.bin$binLen.bed
samtools flagstat ${i}_bwa.bam > ${i}_flagstat.txt
samtools index ${i}_bwa.sorted_bam
bamCoverage -b ${i}_bwa.sorted_bam -o ${i}.bw --normalizeUsing CPM --binSize 10
}
done
为蓖麻基因组构建索引
bwa index -p castor -a is castor-fah12.fa
**寻找输入序列的后缀数组位点**
bwa aln /home/disk2/wangwenbo/genome/castor /home/disk2/wangwenbo
/Dap-seq/p-RcABI3_L2_1002087.R1-1.fastq.gz > p-RcABI3_L2_1002087.R1-1.sai
bwa aln /home/disk2/wangwenbo/genome/castor /home/disk2/wangwenbo
/Dap-seq/p-RcABI3_L2_1002087.R2-1.fastq.gz > p-RcABI3_L2_1002087.R2-1.sai
生成sam格式的序列比对文件
bwa sampe /home/disk2/wangwenbo/genome/castor p-RcABI3_L2_1002087.R1-1.sai p-RcABI3_L2_1002087.R2-1.sai /home/disk2/wangwenbo/Dap-seq/p-RcABI3_L2_1002087.R1-1.fastq.gz /home/disk2/wangwenbo/Dap-seq/p-RcABI3_L2_1002087.R2-1.fastq.gz > p-RcABI3_L2_1002087-1.sam(合并一个)
sam文件转bam文件
samtools view -bS p-RcABI3_L2_1002087-1.sam > p-RcABI3_L2_1002087-1.bam
samtools sort -o p-RcABI3_L2_1002087-1.sorted.bam p-RcABI3_L2_1002087-1.bam
合并
samtools merge p-RcABI3_L2_1002087-1-merge.sorted.bam p-RcABI3_L2_1002087-1.sorted.bam p-RcABI3_L2_1002087-2.sorted.bam