KEGG | Notion

% unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) colnames(KO) <- c("L1","L2","L3","KO") ##Led LED <- fromJSON("https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713248897")#下载并解析JSON文件 LED$name <- NULL LED <- as.data.frame(LED) %>% unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) c"> % unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) colnames(KO) <- c("L1","L2","L3","KO") ##Led LED <- fromJSON("https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713248897")#下载并解析JSON文件 LED$name <- NULL LED <- as.data.frame(LED) %>% unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) c"> % unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) colnames(KO) <- c("L1","L2","L3","KO") ##Led LED <- fromJSON("https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713248897")#下载并解析JSON文件 LED$name <- NULL LED <- as.data.frame(LED) %>% unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) c">

library(rjson)
library(jsonlite)
library(tidyverse)
##PCA
KO <- fromJSON("<https://www.kegg.jp/kegg-bin/download_htext?htext=ko00001&format=json&filedir=>")#下载并解析JSON文件
KO$name <- NULL
KO <- as.data.frame(KO) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(KO) <- c("L1","L2","L3","KO") 

##Led
LED <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713248897>")#下载并解析JSON文件
LED$name <- NULL
LED <- as.data.frame(LED) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(LED) <- c("L1","L2","L3","L4")

##Gju
GJU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713254631>")#下载并解析JSON文件
GJU$name <- NULL
GJU <- as.data.frame(GJU) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(GJU) <- c("L1","L2","L3","L4")
##Hco
HCO <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713267466>")#下载并解析JSON文件
HCO$name <- NULL
HCO <- as.data.frame(HCO) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HCO) <- c("L1","L2","L3","L4")
##Hsu 1713318401
HSU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713318401>")#下载并解析JSON文件
HSU$name <- NULL
HSU <- as.data.frame(HSU) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HSU) <- c("L1","L2","L3","L4")
##Jar
JAR <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713321273>")#下载并解析JSON文件
JAR$name <- NULL
JAR <- as.data.frame(JAR) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(JAR) <- c("L1","L2","L3","L4")
##Pco
PCO <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713338378>")#下载并解析JSON文件
PCO$name <- NULL
PCO <- as.data.frame(PCO) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(PCO) <- c("L1","L2","L3","L4")
##Psu
PSU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713344822>")#下载并解析JSON文件
PSU$name <- NULL
PSU <- as.data.frame(PSU) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(PSU) <- c("L1","L2","L3","L4")
##Rbu
RBU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713406583>")#下载并解析JSON文件
RBU$name <- NULL
RBU <- as.data.frame(RBU) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(RBU) <- c("L1","L2","L3","L4")
##Sco
SCO <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713463152>")#下载并解析JSON文件
SCO$name <- NULL
SCO <- as.data.frame(SCO) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(SCO) <- c("L1","L2","L3","L4")
##Sru
SRU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713465924>")#下载并解析JSON文件
SRU$name <- NULL
SRU <- as.data.frame(SRU) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(SRU) <- c("L1","L2","L3","L4")
##Lh
LH <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713750171>")#下载并解析JSON文件
LH$name <- NULL
LH <- as.data.frame(LH) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(LH) <- c("L1","L2","L3","L4")
##Hru
HRU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713752751>")#下载并解析JSON文件
HRU$name <- NULL
HRU <- as.data.frame(HRU) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HRU) <- c("L1","L2","L3","L4")
##Hps
HPS <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713774334>")#下载并解析JSON文件
HPS$name <- NULL
HPS <- as.data.frame(HPS) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HPS) <- c("L1","L2","L3","L4")
##Hpa
HPA <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713776290>")#下载并解析JSON文件
HPA$name <- NULL
HPA <- as.data.frame(HPA) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HPA) <- c("L1","L2","L3","L4")
##Pca
PCA <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713835973>")#下载并解析JSON文件
PCA$name <- NULL
PCA <- as.data.frame(PCA) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(PCA) <- c("L1","L2","L3","L4")

##green
green <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1719889837>")#下载并解析JSON文件
green$name <- NULL
green <- as.data.frame(green) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(green) <- c("L1","L2","L3","L4")
green_2 <- as.data.frame(table(green[, 2]), stringsAsFactors = FALSE)

data <- ls()
##get：将向量变成变量名
for (name in data) {
       assign(name, na.omit(get(name)))}
##assign用法：再for循环里分配新的变量名字
for (name in data) {
       df <- get(name)
       assign(paste0(name, "_table"), as.data.frame(table(df[, 2]), stringsAsFactors = FALSE))
   }

##堆砌图

theme <- theme_bw() +
  theme(axis.text.x = element_text(colour ="black",hjust = 0.5,size=18), 
        axis.text.y=element_text(colour ="black",size = 10),
        axis.title.x = element_blank(),
        axis.title.y = element_text(colour ="black",size = 18), 
        axis.line = element_line(size=1),
        plot.title = element_text(hjust = 0.5,size =  20),
        legend.position = "top",
        legend.title = element_blank(),
        legend.background = element_rect(fill = 'transparent'),
        legend.text = element_text(size = 20))

ggplot(green_kegg_path, aes(x=factor(pathway, levels=unique(pathway)), 
                            y=num, 
                            fill=factor(species, levels=unique(species)))) +
  labs(
    x = "pathway",   # 调整x轴名称
    y = "num",       # 调整y轴名称
    fill = "type"    # 调整图例名称
  ) +
  coord_flip() + 
  geom_bar(position="fill", stat="identity") +
  scale_fill_manual(values = c("#f2d477","#78e08f"))+geom_text(aes(label=num), position=position_fill(vjust=0.5), color="black", size=3)+theme

# 执行上述代码后，p就是修改后的绘图对象，填充颜色为黄色和绿色

Options:
  -a  <ANALYSES>			            Optional, comma separated list of analyses.  If this option
is not set, ALL analyses will be run.

-b <OUTPUT-FILE-BASE>   		    Optional, base output filename (relative or absolute path).
Note that this option, the output directory (-d) option and
the output file name (-o) option are mutually exclusive.  The
appropriate file extension for the output format(s) will be
appended automatically. By default the input file
path/name will be used.

-d <OUTPUT-DIR>              		    Optional, output directory. Note that this option, the
output file name (-o) option and the output file base (-b) option
are mutually exclusive. The output filename(s) are the
same as the input filename, with the appropriate file
extension(s) for the output format(s) appended automatically .

-c		                            Optional.  Disables use of the precalculated match lookup
service.  All match calculations will be run locally.

-C					    Optional. Supply the number of cpus to use.

-e               			    Optional, excludes sites from the XML, JSON output

-f <OUTPUT-FORMATS>             	    Optional, case-insensitive, comma separated list of output
formats. Supported formats are TSV, XML, JSON, GFF3. Default 
for protein sequences are TSV, XML and
GFF3, or for nucleotide sequences GFF3 and XML.

-g		                            Optional, switch on lookup of corresponding Gene Ontology
annotation (IMPLIES -l lookup option)

-h	                                    Optional, display help information

-i <INPUT-FILE-PATH>               	    Optional, path to fasta file that should be loaded on
Master startup. Alternatively, in CONVERT mode, the
InterProScan 5 XML file to convert.

-l                     		    Also include lookup of corresponding InterPro
annotation in the TSV and GFF3 output formats.

-m <MINIMUM-SIZE>               	    Optional, minimum nucleotide size of ORF to report. Will
only be considered if n is specified as a sequence type.
Please be aware of the fact that if you specify a too
short value it might be that the analysis takes a very long
time!
  
  -o <EXPLICIT_OUTPUT_FILENAME>    	    Optional explicit output file name (relative or absolute
                                                                            path).  Note that this option, the output directory -d option
and the output file basename -b option are mutually
exclusive. If this option is given, you MUST specify a
single output format using the -f option.  The output file
name will not be modified. Note that specifying an output
file name using this option OVERWRITES ANY EXISTING FILE.

-p                             	    Optional, switch on lookup of corresponding Pathway
annotation (IMPLIES -l lookup option)
-t <SEQUENCE-TYPE>              	    Optional, the type of the input sequences (dna/rna (n)
                                                                                or protein (p)).  The default sequence type is protein.

-T <TEMP-DIR>                  	    Optional, specify temporary file directory (relative or
                                                                                absolute path). The default location is temp/.

-v                       		    Optional, verbose log output

-r					    Optional. 'Mode' required ( -r 'cluster') to run in cluster mode. These options
are provided but have not been tested with this wrapper script. For
more information on running InterProScan in cluster mode: 
  <https://github.com/ebi-pf-team/interproscan/wiki/ClusterMode>

-R					    Optional. Clusterrunid (crid) required when using cluster mode.
-R unique_id 
Available analyses:
  TIGRFAM (XX.X) : TIGRFAMs are protein families based on hidden Markov models (HMMs).
SFLD (X) : SFLD is a database of protein families based on hidden Markov models (HMMs).
SUPERFAMILY (X.XX) : SUPERFAMILY is a database of structural and functional annotations for all proteins and genomes.
PANTHER (XX.X) : The PANTHER (Protein ANalysis THrough Evolutionary Relationships) Classification System is a unique resource that classifies genes by their functions, using published scientific experimental evidence and evolutionary relationships to predict function even in the absence of direct experimental evidence.
Gene3D (X.X.X) : Structural assignment for whole genes and genomes using the CATH domain structure database.
Hamap (XXXX_XX) : High-quality Automated and Manual Annotation of Microbial Proteomes.
ProSiteProfiles (XXX_XX) : PROSITE consists of documentation entries describing protein domains, families and functional sites as well as associated patterns and profiles to identify them.
Coils (X.X.X) : Prediction of coiled coil regions in proteins.
SMART (X.X) : SMART allows the identification and analysis of domain architectures based on hidden Markov models (HMMs).
CDD (X.XX) : CDD predicts protein domains and families based on a collection of well-annotated multiple sequence alignment models.
PRINTS (XX.X) : A compendium of protein fingerprints - a fingerprint is a group of conserved motifs used to characterise a protein family.
PIRSR (XXXX_XX) : PIRSR is a database of protein families based on hidden Markov models (HMMs) and Site Rules.
ProSitePatterns (XXXX_XX) : PROSITE consists of documentation entries describing protein domains, families and functional sites as well as associated patterns and profiles to identify them.
AntiFam (X.X) : AntiFam is a resource of profile-HMMs designed to identify spurious protein predictions.
Pfam (XX.X) : A large collection of protein families, each represented by multiple sequence alignments and hidden Markov models (HMMs).
MobiDBLite (X.X) : Prediction of intrinsically disordered regions in proteins.
PIRSF (X.XX) : The PIRSF concept is used as a guiding principle to provide comprehensive and non-overlapping clustering of UniProtKB sequences into a hierarchical order to reflect their evolutionary relationships.

OPTIONS FOR XML PARSER OUTPUTS

-F <IPRS output directory> 		This is the output directory from InterProScan.
-D <database>				Supply the database responsible for these annotations.
-x <taxon>				NCBI taxon ID of the ID being annotated
-y <type>				Transcript or protein
-n <biocurator>				Name of the biocurator who made these annotations
-M <mapping file>			Optional. Mapping file.
-B <bad seq file>			Optional. Bad input sequence file.