下载 CheckM 数据库:
mkdir MY_CHECKM_FOLDER
# Now manually download the database:
cd MY_CHECKM_FOLDER
wget <https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz>
tar -xvf *.tar.gz
rm *.gz
cd ../
下载KRAKEN2标准数据库:
##下载kraken2
<https://github.com/DerrickWood/kraken2>
##数据库
kraken2-build --standard --threads 24 --db MY_KRAKEN2_DB
KRAKEN2
mkdir database
kraken2-build --download-taxonomy --db database
kraken2-build --download-library bacteria --db database
kraken2-build --build --db database
##运行
kraken2 --db database --report k2_report.txt --report-minimizer-data \\
--output k2_output.txt /home/liuli/meta/out_lh/assembly.fasta
Downloading the NCBI_nt BLAST database:
for i in {00..90}
do
wget -c --no-check-certificate <ftp://download.nmdc.cn/tools/meta/NCBI/nt/nt.$i.tar.gz> || echo "Failed to download nt.$i.tar.gz"
done
去除宿主基因组
##hg38.fa为宿主基因组
bmtool -d hg38.fa -o hg38.bitmask
srprism mkindex -i hg38.fa -o hg38.srprism -M 100000
ftps://download.nmdc.cn/tools/meta/kraken2/k2_pluspfp_20230314.tar.gz
最后配置config-metawrap文件
# Paths to metaWRAP scripts (dont have to modify)
mw_path=$(which metawrap)
bin_path=${mw_path%/*}
SOFT=${bin_path}/metawrap-scripts
PIPES=${bin_path}/metawrap-modules
# CONFIGURABLE PATHS FOR DATABASES (see 'Databases' section of metaWRAP README for details)
# path to kraken standard database
KRAKEN2_DB=~/last
# path to indexed human (or other host) genome (see metaWRAP website for guide). This includes .bitmask and .srprism files
BMTAGGER_DB=/home/liuli/liuli/liuli/meta/metaWRAP-master/human
# paths to BLAST databases
BLASTDB=/home/liuli/liuli/liuli/meta/metaWRAP-master/NCBI_nt
TAXDUMP=/home/liuli/liuli/liuli/meta/metaWRAP-master/NCBI_tax
metawrap read_qc -1 /home/liuli/liuli/liuli/meta/illumiona/clean/L1EHK3003588-LH_meta1_1.fastq \\
-2 /home/liuli/liuli/liuli/meta/illumiona/clean/L1EHK3003588-LH_meta1_2.fastq \\
-t 24 -o READ_QC
metawrap kraken2 -t 96 -o output_dir \\
cleanSPAdes_retry/scaffolds.fasta \\
./READ_QC/final_pure_reads_1.fastq ./READ_QC/final_pure_reads_2.fastq