1、染色体长度

2、基因密度

3、gc密度

4、重复序列

5、弦图

## 计算染色体长度
seqtk comp genome.fasta | \\
grep "^HiC_scaffold" |awk '{print $1"\\t"$2}' > genome.len  ##grep里的内容需要根据不同基因组更改
## 生成染色体文件 7列
awk '{print "chr\\t-\\t"$1"\\t"$1"\\t0\\t"$2"\\tchr"NR}' genome.len  > Sind_karyotype.txt
## 生成窗口文件, 窗口大小50Kb
bedtools makewindows -w 50000 -g genome.len > genome.window.bed
## 计算每个窗口平均GC含量
seqtk subseq genome.fasta genome.window.bed > genome.window.fasta
seqtk comp genome.window.fasta |awk '{print $1 "\\t" ($4+$5)/($3+$4+$5+$6) } ' | awk -F ":|-" '{print $1"\\t"$2"\\t"$3"\\t"$4}'> Sind_gc.txt
## 计算每个窗口基因条数
bedtools intersect -a genome.window.bed -b Sind.bed -c -F 0.1 > Sind_genecount.txt
## 计算每个窗口重复序列含量
bedtools coverage -a genome.window.bed -b repeat.gff | awk '{print $1 "\\t" $2 "\\t" $3 "\\t" $7}' > Sind_repeat.txt
## 生成共线性link文件
perl get_jcvi_block.pl Sind.bed Sind.Sind.anchors 8 \\ > Sind_links.txt

## 计算染色体长度
seqtk comp genome.fasta | \\
grep "^HiC_scaffold" |awk '{print $1"\\t"$2}' > genome.len  ##grep里的内容需要根据不同基因组更改
## 生成染色体文件 7列
awk '{print "chr\\t-\\t"$1"\\t"$1"\\t0\\t"$2"\\tchr"NR}' genome.len  > Sind_karyotype.txt
## 生成窗口文件, 窗口大小50Kb
bedtools makewindows -g genome.len -w 50000 > 50k.bed
bedtools nuc -fi ~/rna-seq/hic-final-green.fasta.masked.fasta -bed 50k.bed | cut -f 1-3,5 > 50k_gc.bed
##计算gc密度
grep -v "#" | awk -vFS="\\t" -vOFS="\\t" '{print $1,$2,$3,$4*($3-$2)}' 50k_gc.bed > gc.density.txt
##统计基因数目
grep "gene" hic-green.gff3 > genes.bed  ##gff3为当前基因组gff文件
bedtools intersect -a 50k.bed -b genes.bed -c > gene_counts.txt
## 计算每个窗口重复序列含量  -b后面接的是repeatmodel的gff结果
bedtools coverage -a 50k.bed -b hic-final-green.fasta.out.gff | awk '{print $1 "\\t" $2 "\\t" $3 "\\t" $7}' > Sind_repeat.txt
##同线性
##blast比对得到green.blast
makeblastdb -in Coccomyxa_subellipsoidea_filter.fasta -dbtype prot -parse_seqids -out green
blastp -query Coccomyxa_subellipsoidea_filter.fasta -db green -evalue 1e-5 -outfmt 6
##处理gff3文件得到green.gff
awk '$3 == "mRNA" {split($9, a, ";"); split(a[1], b, "="); print $1,b[2], $4, $5}' hic-green.gff3 > green.gff
sed -i 's/ \\+/\\t/g' green.gff
sed -i 's/HiC_scaffold_/chr/g' green.gff
 head green.gff 
chr1	g1.t1	1	2607
chr1	g2.t1	2785	3446
chr1	g3.t1	3596	4531
chr1	g4.t1	4669	5745
chr1	g5.t1	5819	9317
chr1	g6.t1	9410	11223
chr1	g7.t1	11253	12104
chr1	g8.t1	12351	14758
chr1	g9.t1	14826	16604
chr1	g10.t1	16693	17607

将green.blast和green.gff放入一个文件夹:plot中然后运行

MCScanX  ./plot/green  ##green 为文件的共同开头,例如 green.blast green.gff
然后进入/backup/liuli/geneomic/MCScanX-master/downstream_analyses文件夹运行
perl mkCircosInputs.pl ../green/plot/green.gff ../green/plot/green.collinearity
得到:green.links
head green.links 
chr1 71163 75540 chr1 333503 334960
chr1 96025 97311 chr1 348057 349415
chr1 101320 102354 chr1 350841 351950
chr1 110723 111646 chr1 361632 362555
chr1 111853 112080 chr1 362762 363031
chr1 112190 113063 chr1 363041 363975
chr1 113111 114211 chr1 364023 365123
chr1 114263 115441 chr1 365174 365971
chr1 115468 115677 chr1 366380 366589
chr1 115769 117727 chr1 366681 374309
如果运行perl mkCircosInputs.pl ../green/plot/green.gff ../green/plot/green.blast
得到:green.tandem
head green.tandem 
g10015.t1,g10016.t1
g10070.t1,g10071.t1
g10118.t1,g10119.t1
g10119.t1,g10120.t1
g10120.t1,g10121.t1
g10121.t1,g10122.t1
g10122.t1,g10123.t1
g10123.t1,g10124.t1
g10124.t1,g10125.t1
g10125.t1,g10126.t1

##共线性
python -m jcvi.formats.gff bed --type=mRNA --key=ID  /public1/home/t0s000552/green/braker3/hic-green.gff3 -o ./fchra.bed
python3 -m jcvi.formats.bed uniq fchra.bed
gffread /public1/home/t0s000552/green/braker3/hic-green.gff3 -g ~/rna-seq/hic-final-green.fasta.masked.fasta -x fchra.cds.fa
python -m jcvi.formats.fasta format fchra.cds.fa fchra.cds
python -m jcvi.compara.catalog ortholog fchra fchra --cscore=.99 --no_strip_names --notex
python jcvi_simple2links.py fchra.fchra.anchors > fchrab.anchors.link.txt

nucmer --prefix=ref_qry fchra.cds.fa fchra.cds.fa  ##cds比对
delta-filter -q ref_qry.delta > ref_qry.filter
show-coords -rcl ref_qry.delta > ref_qry.coords
awk '$10>60, OFS="\\t"{print $18,$19}' ref_qry.coords > req_qry.50k.LinkedRegion.info

awk '$3 == "mRNA" {split($9, a, ";"); split(a[1], b, "="); print b[2],$1, $4, $5}' hic-green.gff3 > gene_positions.txt

# 将基因位置与 req_qry.50k.LinkedRegion.info 文件中的基因名进行匹配
awk 'NR==FNR {pos[$1] = $2 "\\t" $3 "\\t" $4; next} 
{
    gene1_pos = pos[$1] ? pos[$1] : "NA\\tNA\\tNA"
    gene2_pos = pos[$2] ? pos[$2] : "NA\\tNA\\tNA"
    print $1 "\\t" gene1_pos "\\t" $2 "\\t" gene2_pos
}' gene_positions.txt req_qry.50k.LinkedRegion.info > genes_with_positions.txt

R绘图

options("repos" = c(CRAN="<http://mirrors.tuna.tsinghua.edu.cn/CRAN/>"))
if (! require(circlize)) {
  install.packages('circlize')
}
library(circlize)

##数据示例                           
> head(chr_length) 
V7 V5      V6
1 chr1  0 4690352
2 chr2  0 2412435
3 chr3  0 1545349
4 chr4  0 2110340
5 chr5  0 1628745
6 chr6  0 2105321

> head(gc.density)    ##有时候需要对第四列进行处理,v4/窗口值
V1     V2     V3      V4
1 chr1      0  50000 0.68880
2 chr1  50000 100000 0.66010
3 chr1 100000 150000 0.64856
4 chr1 150000 200000 0.64148
5 chr1 200000 250000 0.63864
6 chr1 250000 300000 0.62798

> head(gene_counts)  ##有时候需要对第四列进行处理,v4/窗口值
V1     V2     V3      V4
1 chr1      0  50000 0.00066
2 chr1  50000 100000 0.00068
3 chr1 100000 150000 0.00090
4 chr1 150000 200000 0.00074
5 chr1 200000 250000 0.00076
6 chr1 250000 300000 0.00094

> head(Sind_repeat)  ##有时候需要对第四列进行处理,v4/窗口值
V1     V2     V3      V4
1 chr1      0  50000 0.03406
2 chr1  50000 100000 0.01922
3 chr1 100000 150000 0.02328
4 chr1 150000 200000 0.00598
5 chr1 200000 250000 0.00300
6 chr1 250000 300000 0.00272

> head(bed1)  ##需要对染色体进行着色就在对应的第四列添加颜色代码
V2      V3      V4   color
1 chr1   16693   17607 #8b89b0
2 chr1 1278576 1279085 #8b89b0
3 chr2  415145  416878 #f4436d
4 chr2  418770  421770 #f4436d
5 chr2  426005  428843 #f4436d
6 chr2  430324  433922 #f4436d
> head(bed2)
V6      V7      V8   color
1 chr1   16693   17607 #8b89b0
2 chr1 1278576 1279085 #8b89b0
3 chr2  415145  416878 #f4436d
4 chr2  418770  421770 #f4436d
5 chr2  426005  428843 #f4436d
6 chr2  430324  433922 #f4436d

circos.clear()

##设置数据
link_data[, c(2,3,5,6)] <- lapply(link_data[, c(2,3,5,6)], as.numeric)
bed1 <- link_data[,1:3]
bed2 <- link_data[,4:6]
circos.clear()

# 设置 Circos 参数
gap.after <- c(rep(5, 19), 15)  # 这里假设有20条染色体,chr1到chr19间隔为5,chr20到chr1间隔为15
circos.par("gap.after" = gap.after)

# 初始化 Circos 图
circos.genomicInitialize(chr_length, track.height = 0.03)

# 绘制 GC 含量折线图
circos.genomicTrackPlotRegion(gc.density, panel.fun = function(region, value, ...) {
  circos.genomicLines(region, value, col = "#c6e770", lwd = 2, ...)
}, track.height = 0.1, bg.border = NA)  ## bg.border = NA添加边界  track.height设置大小

# 绘制基因计数柱状图
circos.genomicTrackPlotRegion(gene_counts, panel.fun = function(region, value, ...) {
  circos.genomicLines(region, value, type = "h", col = "#f4436d", lwd = 2, ...)
}, track.height = 0.1, bg.border = NA)

# 绘制重复序列热图
circos.genomicTrackPlotRegion(Sind_repeat, panel.fun = function(region, value, ...) {
  circos.genomicRect(region, value, col = colorRamp2(c(0, 1), c("pink", "blue"))(value), border = NA, ...)
}, track.height = 0.1, bg.border = NA)

# 绘制基因组链接
circos.genomicLink(bed1, bed2, col = bed1$color)

# 清理 Circos 图
circos.clear()
**另一个种表现形式**
 # 设置 Circos 参数,调整画布比例为扁平形状
circos.par(
  canvas.xlim = c(-2, 2),  # x 方向范围拉宽
  canvas.ylim = c(-1, 1),  # y 方向范围压扁
  cell.padding = c(0, 0, 0, 0),
  gap.degree = 5           # 调整染色体间距
)
##绘制染色体
circos.initialize(
  factors = chr_length$V7,
  xlim = cbind(chr_length$V5, chr_length$V6)
)
circos.track(
  ylim = c(0, 1),
  panel.fun = function(x, y) {
    sector_index <- CELL_META$sector.index
    circos.rect(
      CELL_META$xlim[1], 0, CELL_META$xlim[2], 1,
      col = "#b2cee6", border = NA
    )
    circos.text(
      x = CELL_META$xcenter,
      y = 1.5,
      labels = sector_index,
      facing = "bending.inside",
      adj = c(0.5, 0.5),
      cex = 1
    )
  },
  track.height = 0.05  # 减小轨道高度,使图更扁平
)

circos.genomicTrackPlotRegion(gc.density2, panel.fun = function(region, value, ...) 
{circos.genomicRect(region, value, col = colorRamp2(c(0.0050, 0.5240, 0.7298), c("white", "#FDDED7", "red"))(value), border = NA, ...)}, track.height = 0.1)

col_fun <- colorRamp2(
  c(-1, 0, 1), 
  c("#ef8a62", "#f7f7f7", "#67a9cf")
)
circos.genomicHeatmap(
  bed, col = col_fun, side = "inside", 
  border = "white"
)