library(caret)
library(MLSeq)
group <- "group"
train_index <- createDataPartition(metadata[[group]], p=0.75, list=F)
train_data <- expr_mat[train_index,]
train_data_group <- metadata[[group]][train_index]
#train_data_group <- metadata[train_index,,drop=F]
test_data <- expr_mat[-train_index,]
test_data_group <- metadata[[group]][-train_index]
#test_data_group <- metadata[-train_index,,drop=F]
train_data <- round(train_data)
test_data <- round(test_data)
data2 <- DESeqDataSetFromMatrix(countData = train_data,
colData = train_data_group,formula(~ 1))
data3 <- DESeqDataSetFromMatrix(countData = test_data,
colData = test_data_group,formula(~ 1))
rf <- classify(data = data.trainS4, method = "rf",
preProcessing = "deseq-vst", ref = "T",
control = trainControl(method = "repeatedcv", number = 5,
repeats = 2, classProbs = TRUE))
prediction_prob <- predict(rf@modelInfo@trainedModel, newdata=t(test_data), type="prob")
## Not run:
library(DESeq2)
data(cervical)
# a subset of cervical data with first 150 features.
data <- cervical[c(1:150), ]
# defining sample classes.
class <- data.frame(condition = factor(rep(c("N","T"), c(29, 29))))
n <- ncol(data) # number of samples
p <- nrow(data) # number of features
# number of samples for test set (30% test, 70% train).
nTest <- ceiling(n*0.3)
ind <- sample(n, nTest, FALSE)
# train set
data.train <- data[ ,-ind]
data.train <- as.matrix(data.train + 1)
classtr <- data.frame(condition = class[-ind, ])
# train set in S4 class
data2 <- DESeqDataSetFromMatrix(countData = train_data,colData = train_class,formula(~ 1))
ddf2 <- DESeq(data2)
resultsNames(ddf2)
res <- results(ddf2)
diff_gene_deseq2 <-subset(res,padj < 0.05 & (log2FoldChange > 2 | log2FoldChange < -2))
up_deg <- subset(res,padj < 0.05 & log2FoldChange > 2)
down_deg <- subset(res,padj < 0.05 & log2FoldChange < -2)
length(rownames(diff_gene_deseq2))
length(rownames(up_deg))
length(rownames(down_deg))
write.csv(diff_gene_deseq2,"D:/as.csv")
## Number of repeats (repeats) might change model accuracies
## 1. caret-based classifiers:
# Random Forest (RF) Classification
rf <- classify(data = data.trainS4, method = "rf",
preProcessing = "deseq-vst", ref = "T",
control = trainControl(method = "repeatedcv", number = 5,
repeats = 2, classProbs = TRUE))