busco -i lh_green.FINAL.fasta -o busco -l /home/my/database/busco/firmicutes_odb10 -m genome -c 12 -e 1e-05
busco_plot -wd run_busco/ ##绘图
Rscript busco_figure.R
##物种数据库
<https://busco-data.ezlab.org/v4/data/lineages/>
##新版
conda activate hifi
busco -i ~/rna-seq/hic-final-green.fasta.masked -o hic -l ./eukaryota_odb10 -m genome -c 12 -e 1e-05
generate_plot.py -wd ./hic
#简单设置如下,其它参数暂且使用默认值
#-i,输入组装基因组草图“Bacillus_subtilis.scaffolds.fasta”
#-o,结果输出路径
#-l,单拷贝基因集“firmicutes_odb10”的存放路径
#-m,运行基因组评估模式
#-c,程序运行线程数为 4 以提升运行速率
#-e,序列比对 e 值的阈值设为 1e-5
conda activate python37
quast.py -t 10 -o test1_out out.fa
主要结果为report.html
conda activate merqury
sh $MERQURY/best_k.sh 102,332,613 ## 确定最佳的kmer大小,获得k值
for i in {1..2}; do
meryl k=19 count output read$i.meryl merged.clean$i.fq
done
#合并meryl
meryl union-sum output read.meryl read*.meryl
#运行merqury
merqury.sh read.meryl genome_file.fa profix
##三代测序
meryl k=18 count output read.meryl ../20211018-NPL3724-P6-PAJ02353.pass.fastq
./merqury-master/merqury.sh read.meryl lh.fa output
bwa index assembly.fa
bwa mem -t 16 assembly.fa reads_R1.fq reads_R2.fq | samtools view -bS - > aln.bam
samtools flagstat aln.bam
minimap2 -x map-pb -t 16 assembly.fa longreads.fq -a | samtools view -bS - > aln.bam
samtools flagstat aln.bam
gt suffixerator \\
-db ~/backup/green/hic-final-green.fasta \\
-indexname green \\
-tis -suf -lcp -des -ssp -sds -dna
gt ltrharvest \\
-index green \\
-similar 90 -vic 10 -seed 20 -seqids yes \\
-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 \\
-motif TGCA -motifmis 1 > green.harvest.scn
LTR_retriever -genome ~/backup/green/hic-final-green.fasta -inharvest green.harvest.scn
cat lh.fa.out.LAI |head
**第二行最后一列即为总基因组的LAI值
评价标准**:|LAI|category| |----|----| |0<LAI<10|draft| |10=<LAI<20|reference| |LAI>=20|Gold|