% unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) colnames(KO) <- c("L1","L2","L3","KO") ##Led LED <- fromJSON("https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713248897")#下载并解析JSON文件 LED$name <- NULL LED <- as.data.frame(LED) %>% unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) c"> % unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) colnames(KO) <- c("L1","L2","L3","KO") ##Led LED <- fromJSON("https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713248897")#下载并解析JSON文件 LED$name <- NULL LED <- as.data.frame(LED) %>% unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) c"> % unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) colnames(KO) <- c("L1","L2","L3","KO") ##Led LED <- fromJSON("https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713248897")#下载并解析JSON文件 LED$name <- NULL LED <- as.data.frame(LED) %>% unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数 unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>% unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy) c">
library(rjson)
library(jsonlite)
library(tidyverse)
##PCA
KO <- fromJSON("<https://www.kegg.jp/kegg-bin/download_htext?htext=ko00001&format=json&filedir=>")#下载并解析JSON文件
KO$name <- NULL
KO <- as.data.frame(KO) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(KO) <- c("L1","L2","L3","KO")
##Led
LED <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713248897>")#下载并解析JSON文件
LED$name <- NULL
LED <- as.data.frame(LED) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(LED) <- c("L1","L2","L3","L4")
##Gju
GJU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713254631>")#下载并解析JSON文件
GJU$name <- NULL
GJU <- as.data.frame(GJU) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(GJU) <- c("L1","L2","L3","L4")
##Hco
HCO <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713267466>")#下载并解析JSON文件
HCO$name <- NULL
HCO <- as.data.frame(HCO) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HCO) <- c("L1","L2","L3","L4")
##Hsu 1713318401
HSU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713318401>")#下载并解析JSON文件
HSU$name <- NULL
HSU <- as.data.frame(HSU) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HSU) <- c("L1","L2","L3","L4")
##Jar
JAR <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713321273>")#下载并解析JSON文件
JAR$name <- NULL
JAR <- as.data.frame(JAR) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(JAR) <- c("L1","L2","L3","L4")
##Pco
PCO <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713338378>")#下载并解析JSON文件
PCO$name <- NULL
PCO <- as.data.frame(PCO) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(PCO) <- c("L1","L2","L3","L4")
##Psu
PSU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713344822>")#下载并解析JSON文件
PSU$name <- NULL
PSU <- as.data.frame(PSU) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(PSU) <- c("L1","L2","L3","L4")
##Rbu
RBU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713406583>")#下载并解析JSON文件
RBU$name <- NULL
RBU <- as.data.frame(RBU) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(RBU) <- c("L1","L2","L3","L4")
##Sco
SCO <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713463152>")#下载并解析JSON文件
SCO$name <- NULL
SCO <- as.data.frame(SCO) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(SCO) <- c("L1","L2","L3","L4")
##Sru
SRU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713465924>")#下载并解析JSON文件
SRU$name <- NULL
SRU <- as.data.frame(SRU) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(SRU) <- c("L1","L2","L3","L4")
##Lh
LH <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713750171>")#下载并解析JSON文件
LH$name <- NULL
LH <- as.data.frame(LH) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(LH) <- c("L1","L2","L3","L4")
##Hru
HRU <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713752751>")#下载并解析JSON文件
HRU$name <- NULL
HRU <- as.data.frame(HRU) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HRU) <- c("L1","L2","L3","L4")
##Hps
HPS <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713774334>")#下载并解析JSON文件
HPS$name <- NULL
HPS <- as.data.frame(HPS) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HPS) <- c("L1","L2","L3","L4")
##Hpa
HPA <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713776290>")#下载并解析JSON文件
HPA$name <- NULL
HPA <- as.data.frame(HPA) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(HPA) <- c("L1","L2","L3","L4")
##Pca
PCA <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1713835973>")#下载并解析JSON文件
PCA$name <- NULL
PCA <- as.data.frame(PCA) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(PCA) <- c("L1","L2","L3","L4")
##green
green <- fromJSON("<https://www.genome.jp/kegg-bin/download_htext?htext=q00001.keg&format=json&filedir=/tools/kaas/files/log/result/1719889837>")#下载并解析JSON文件
green$name <- NULL
green <- as.data.frame(green) %>%
unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(green) <- c("L1","L2","L3","L4")
green_2 <- as.data.frame(table(green[, 2]), stringsAsFactors = FALSE)
data <- ls()
##get:将向量变成变量名
for (name in data) {
assign(name, na.omit(get(name)))}
##assign用法:再for循环里分配新的变量名字
for (name in data) {
df <- get(name)
assign(paste0(name, "_table"), as.data.frame(table(df[, 2]), stringsAsFactors = FALSE))
}
##堆砌图
theme <- theme_bw() +
theme(axis.text.x = element_text(colour ="black",hjust = 0.5,size=18),
axis.text.y=element_text(colour ="black",size = 10),
axis.title.x = element_blank(),
axis.title.y = element_text(colour ="black",size = 18),
axis.line = element_line(size=1),
plot.title = element_text(hjust = 0.5,size = 20),
legend.position = "top",
legend.title = element_blank(),
legend.background = element_rect(fill = 'transparent'),
legend.text = element_text(size = 20))
ggplot(green_kegg_path, aes(x=factor(pathway, levels=unique(pathway)),
y=num,
fill=factor(species, levels=unique(species)))) +
labs(
x = "pathway", # 调整x轴名称
y = "num", # 调整y轴名称
fill = "type" # 调整图例名称
) +
coord_flip() +
geom_bar(position="fill", stat="identity") +
scale_fill_manual(values = c("#f2d477","#78e08f"))+geom_text(aes(label=num), position=position_fill(vjust=0.5), color="black", size=3)+theme
# 执行上述代码后,p就是修改后的绘图对象,填充颜色为黄色和绿色
Options:
-a <ANALYSES> Optional, comma separated list of analyses. If this option
is not set, ALL analyses will be run.
-b <OUTPUT-FILE-BASE> Optional, base output filename (relative or absolute path).
Note that this option, the output directory (-d) option and
the output file name (-o) option are mutually exclusive. The
appropriate file extension for the output format(s) will be
appended automatically. By default the input file
path/name will be used.
-d <OUTPUT-DIR> Optional, output directory. Note that this option, the
output file name (-o) option and the output file base (-b) option
are mutually exclusive. The output filename(s) are the
same as the input filename, with the appropriate file
extension(s) for the output format(s) appended automatically .
-c Optional. Disables use of the precalculated match lookup
service. All match calculations will be run locally.
-C Optional. Supply the number of cpus to use.
-e Optional, excludes sites from the XML, JSON output
-f <OUTPUT-FORMATS> Optional, case-insensitive, comma separated list of output
formats. Supported formats are TSV, XML, JSON, GFF3. Default
for protein sequences are TSV, XML and
GFF3, or for nucleotide sequences GFF3 and XML.
-g Optional, switch on lookup of corresponding Gene Ontology
annotation (IMPLIES -l lookup option)
-h Optional, display help information
-i <INPUT-FILE-PATH> Optional, path to fasta file that should be loaded on
Master startup. Alternatively, in CONVERT mode, the
InterProScan 5 XML file to convert.
-l Also include lookup of corresponding InterPro
annotation in the TSV and GFF3 output formats.
-m <MINIMUM-SIZE> Optional, minimum nucleotide size of ORF to report. Will
only be considered if n is specified as a sequence type.
Please be aware of the fact that if you specify a too
short value it might be that the analysis takes a very long
time!
-o <EXPLICIT_OUTPUT_FILENAME> Optional explicit output file name (relative or absolute
path). Note that this option, the output directory -d option
and the output file basename -b option are mutually
exclusive. If this option is given, you MUST specify a
single output format using the -f option. The output file
name will not be modified. Note that specifying an output
file name using this option OVERWRITES ANY EXISTING FILE.
-p Optional, switch on lookup of corresponding Pathway
annotation (IMPLIES -l lookup option)
-t <SEQUENCE-TYPE> Optional, the type of the input sequences (dna/rna (n)
or protein (p)). The default sequence type is protein.
-T <TEMP-DIR> Optional, specify temporary file directory (relative or
absolute path). The default location is temp/.
-v Optional, verbose log output
-r Optional. 'Mode' required ( -r 'cluster') to run in cluster mode. These options
are provided but have not been tested with this wrapper script. For
more information on running InterProScan in cluster mode:
<https://github.com/ebi-pf-team/interproscan/wiki/ClusterMode>
-R Optional. Clusterrunid (crid) required when using cluster mode.
-R unique_id
Available analyses:
TIGRFAM (XX.X) : TIGRFAMs are protein families based on hidden Markov models (HMMs).
SFLD (X) : SFLD is a database of protein families based on hidden Markov models (HMMs).
SUPERFAMILY (X.XX) : SUPERFAMILY is a database of structural and functional annotations for all proteins and genomes.
PANTHER (XX.X) : The PANTHER (Protein ANalysis THrough Evolutionary Relationships) Classification System is a unique resource that classifies genes by their functions, using published scientific experimental evidence and evolutionary relationships to predict function even in the absence of direct experimental evidence.
Gene3D (X.X.X) : Structural assignment for whole genes and genomes using the CATH domain structure database.
Hamap (XXXX_XX) : High-quality Automated and Manual Annotation of Microbial Proteomes.
ProSiteProfiles (XXX_XX) : PROSITE consists of documentation entries describing protein domains, families and functional sites as well as associated patterns and profiles to identify them.
Coils (X.X.X) : Prediction of coiled coil regions in proteins.
SMART (X.X) : SMART allows the identification and analysis of domain architectures based on hidden Markov models (HMMs).
CDD (X.XX) : CDD predicts protein domains and families based on a collection of well-annotated multiple sequence alignment models.
PRINTS (XX.X) : A compendium of protein fingerprints - a fingerprint is a group of conserved motifs used to characterise a protein family.
PIRSR (XXXX_XX) : PIRSR is a database of protein families based on hidden Markov models (HMMs) and Site Rules.
ProSitePatterns (XXXX_XX) : PROSITE consists of documentation entries describing protein domains, families and functional sites as well as associated patterns and profiles to identify them.
AntiFam (X.X) : AntiFam is a resource of profile-HMMs designed to identify spurious protein predictions.
Pfam (XX.X) : A large collection of protein families, each represented by multiple sequence alignments and hidden Markov models (HMMs).
MobiDBLite (X.X) : Prediction of intrinsically disordered regions in proteins.
PIRSF (X.XX) : The PIRSF concept is used as a guiding principle to provide comprehensive and non-overlapping clustering of UniProtKB sequences into a hierarchical order to reflect their evolutionary relationships.
OPTIONS FOR XML PARSER OUTPUTS
-F <IPRS output directory> This is the output directory from InterProScan.
-D <database> Supply the database responsible for these annotations.
-x <taxon> NCBI taxon ID of the ID being annotated
-y <type> Transcript or protein
-n <biocurator> Name of the biocurator who made these annotations
-M <mapping file> Optional. Mapping file.
-B <bad seq file> Optional. Bad input sequence file.