单个建立:
mkdir 5199
mv test/* 5199
jbrowse add-assembly ~/genomic/as/BAAIEH01.1.fsa_nt.fna.gz --name "Cladonia-species" --load copy
jbrowse add-track ~/gff/BAAIEH01.1.fsa_nt.gff3 --assemblyNames "Cladonia-species" --load copy --name 'Cladonia-species'
批量建立脚本
给每个文件进行处理
for i in ls *gff3;
do sort -k1,1 -k4,4n $i > $i.sorted.gff3;
bgzip $i.sorted.gff3 ;
tabix -p gff $i.sorted.gff3.gz;
done
for i in *fna ;
do bgzip $i;
samtools faidx $i.gz;
done
#!/usr/bin/env bash
set -euo pipefail
############################
# 可配置路径
############################
SPECIES_TSV=/home/ubuntu/lichen/jbrowse/data/spe.tsv
TEMPLATE_DIR=/home/ubuntu/lichen/jbrowse/data/test
JBROWSE_DATA_DIR=/home/ubuntu/lichen/jbrowse/data/
FASTA_DIR=/home/ubuntu/genomic/as
GFF_DIR=/home/ubuntu/gff
############################
# 跳过表头,逐行读取
############################
tail -n +2 "$SPECIES_TSV" | while IFS=$'\\t' read -r \\
species BioProject_ID taxid Organism_name Assembly_level Sequencing_technology \\
total_length contig_N50 gc_perc GenBank_accession
do
echo "==> Processing $species ($GenBank_accession)"
##################################
# 1. 为每个物种建独立目录(用 taxid)
##################################
workdir="${JBROWSE_DATA_DIR}/${taxid}"
#mkdir -p "$workdir"
##################################
# 2. 拷贝 JBrowse 模板(只在第一次)
##################################
#if [[ ! -f "$workdir/config.json" ]]; then
# cp -r "$TEMPLATE_DIR"/* "$workdir/"
#fi
##################################
# 3. 进入物种目录
##################################
pushd "$workdir" > /dev/null
##################################
# 4. 定位 FASTA
##################################
#fasta=$(ls ${FASTA_DIR}/${GenBank_accession}*.fna.gz 2>/dev/null | head -n 1)
#if [[ -z "$fasta" ]]; then
# echo "❌ FASTA not found for $GenBank_accession"
# popd > /dev/null
# continue
#fi
##################################
# 5. 定位 GFF
##################################
gff=$(ls ${GFF_DIR}/${GenBank_accession}*sorted.gff3.gz 2>/dev/null | head -n 1)
if [[ -z "$gff" ]]; then
echo "❌ GFF not found for $GenBank_accession"
popd > /dev/null
continue
fi
##################################
# 6. 添加 assembly(显示名用 Organism_name)
##################################
jbrowse add-assembly "$fasta" \\
--name "$species" \\
--load copy \\
>/dev/null 2>&1 || true
##################################
# 7. 添加基因注释轨道
##################################
jbrowse add-track "$gff" \\
--assemblyNames "$species" \\
--load copy \\
--name "Genes" \\
>/dev/null 2>&1 || true
##################################
# 8. 回到上级目录
##################################
popd > /dev/null
echo "✅ Done: $species"
echo
done
需要一个信息表格spe.tsv
| species | BioProject_ID | taxid | Organism_name | Assembly_level | Sequencing_technology | total_length(Mb) | contig_N50(Mb) | gc_perc | GenBank_accession |
|---|---|---|---|---|---|---|---|---|---|
| Cladonia species | PRJDB20668 | 5199 | Cladonia species | Scaffold | Illumina Novaseq X | 31.99 Mb | 0.144 | 47.77 | NA |
| Usnea hakonensis | PRJDB19959 | 362614 | Usnea hakonensis (ascomycete fungi) | Scaffold | Illumina Hiseq 2500 | 41.198 | 0.147 | 45.5 | GCA_013423325.1 |
| Lichina confinis | PRJEB93766 | 174425 | Lichina confinis (ascomycete fungi) | Contig | Illumina HiSeq | 25.172 | 0.078 | 55 | GCA_026054655.1 |
| Ionaspis lacustris | PRJEB88318 | 196135 | Ionaspis lacustris (ascomycete fungi) | Scaffold | Illumina HiSeq 2000 | 18.792 | 0.01 | 49.5 | GCA_964257415.1 |
| Flavoplaca marina | PRJEB87567 | 1301332 | Flavoplaca marina (ascomycete fungi) | Chromosome | PacBio,Arima2 | 36.272 | 0.154 | 43 | GCA_965225635.1 |
| Verrucaria ditmarsica | PRJEB86198 | 50943 | Cladonia portentosa (ascomycete fungi) | Chromosome | PacBio,Arima2 | 35.108 | 1.431 | 45 | GCA_965153415.1 |
| Lichina confinis | PRJEB85674 | 1217787 | Lecanora helva (ascomycete fungi) | Contig | PacBio | 30.071 | 2.684 | 47 | GCA_050886345.1 |
| Cladonia portentosa | PRJEB85672 | 50926 | Cladonia diversa (ascomycete fungi) | Scaffold | PacBio,Arima2 | 48.119 | 0.113 | 47 | GCA_964605305.1 |