513 lines
17 KiB
R

if (!require("dplyr")) install.packages("dplyr")
library(dplyr)
# create a phenotype vector
# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that
# intersect %in% with the RNA-Seq data
phenos.df <- subject.attrs %>%
filter(X %in% colnames(sense.filtered.cpm)) %>%
dplyr::select(X, Diag)
colnames(phenos.df) # $Diag is mdd diagnosis
# grab Diag column and convert character to factor
mddPheno <- as.factor(phenos.df$Diag) # this is our phenotype/class vector
summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control
#### Part B: Normalization
## 1: log2 transformation
# raw cpm boxplots and histogram of one sample
boxplot(sense.filtered.cpm,range=0,
ylab="raw probe intensity", main="Raw", names=mddPheno)
hist(sense.filtered.cpm[,1], freq=F, ylab="density",
xlab="raw probe intensity", main="Raw Data Density for Sample 1")
# log2 transformed boxplots and histogram
boxplot(log2(sense.filtered.cpm), range=0,
ylab="log2 intensity", main="Log2 Transformed", names=mddPheno)
hist(log2(sense.filtered.cpm[,1]), freq=F, ylab="density",
xlab="log2 probe intensity", main="log2 Data Density for Sample 1")
getmode <- function(v) {
uniqv <-unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
mean(log2(sense.filtered.cpm[,1]))),
Mode = c(getmode(sense.filtered.cpm[,1]),
getmode(log2(sense.filtered.cpm[,1]))),
Median = c(median(sense.filtered.cpm[,1]))
)
rownames(data) = c("Original", "Log2 Transformed")
data
## 2: Quantile Normalization
# install quantile normalize
#install.packages("BiocManager")
if (!require("BiocManager")) install.packages("BiocManager")
library(BiocManager)
if (!require("preprocessCore"))
BiocManager::install("preprocessCore")
library(preprocessCore) # replace with custom function?
# apply quantile normalization
mddExprData_quantile <- normalize.quantiles(sense.filtered.cpm)
boxplot(mddExprData_quantile,range=0,ylab="raw intensity", main="Quantile Normalized")
sapply(mddExprData_quantile, function(x) quantile(x, probs = seq(0, 1, 1/4)))
quantile(mddExprData_quantile, probs = seq(0, 1, 1/4)))
quantile(mddExprData_quantile, probs = seq(0, 1, 1/4))
length(mddExprData_quantile)
sapply(mddExprData_quantile, function(x) quantile(mddExprData_quantile, probs = seq(0, 1, 1/4)))
head(mddExprData_quantile)
head(mddExprData_quantile)
head(mddExprData_quantile[,3])
head(mddExprData_quantile[,1-3])
head(mddExprData_quantile)
### 3: Log2 on quantile normalized data
mddExprData_quantileLog2 <- log2(mddExprData_quantile)
# add phenotype names to matrix
colnames(mddExprData_quantileLog2) <- mddPheno
boxplot(mddExprData_quantileLog2,range=0,
ylab="log2 intensity", main="Quantile Normalized Log2")
hist(log2(mddExprData_quantileLog2[,1]), freq=F,
ylab="density", xlab="log2 probe intensity",
main="log2 Quantile Normalized for Sample 1")
## 4: Means
mean(mddExprData_quantileLog2[,1])
colMeans(mddExprData_quantileLog2)
## 4: Means
mean(mddExprData_quantileLog2[,1])
expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene
colnames(expr_SxG) <- rownames(sense.filtered.cpm) # add gene names
## MDS of subjects
#d<-dist(expr_SxG) # Euclidean metric
mddCorr<-cor(t(expr_SxG)) # distance based on correlation
d <- sqrt(1-mddCorr)
mdd.mds <- cmdscale(d, k = 2)
x <- mdd.mds[,1]
y <- mdd.mds[,2]
mdd.mds.df <- data.frame(mdd.mds)
colnames(mdd.mds.df) <- c("dim1","dim2")
if (!require("ggplot2"))
BiocManager::install("ggplot2")
library(ggplot2)
p <- ggplot() # initialize empty ggplot object
p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3)
p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2")
print(p)
expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene
colnames(expr_SxG) <- rownames(sense.filtered.cpm) # add gene names
## MDS of subjects
#d<-dist(expr_SxG) # Euclidean metric
mddCorr<-cor(t(expr_SxG)) # distance based on correlation
d <- sqrt(1-mddCorr)
mdd.mds <- cmdscale(d, k = 2)
x <- mdd.mds[,1]
y <- mdd.mds[,2]
mdd.mds.df <- data.frame(mdd.mds)
colnames(mdd.mds.df) <- c("dim1","dim2")
if (!require("ggplot2"))
BiocManager::install("ggplot2")
library(ggplot2)
p <- ggplot() # initialize empty ggplot object
p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3)
p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2")
print(p)
dim(mdd.mds)
dim(mdd.mds.df)
library(umap)
mddTree = hclust(as.dist(d))
mddTree$labels <- mddPheno
plot(mddTree)
num.clust <- 5
mddCuts <- cutree(mddTree,k=num.clust)
table(names(mddCuts),mddCuts)
if (!require("dendextend")) install.packages("dendextend")
library(dendextend)
if (!require("dendextend")) install.packages("dendextend")
library(dendextend)
## 4: UMAP
if (!require("umap")) install.packages("umap")
library(umap)
## 2: hierarchical cluster of subjects
mddTree = hclust(as.dist(d))
mddTree$labels <- mddPheno
plot(mddTree)
## 2: hierarchical cluster of subjects
mddTree = hclust(as.dist(d))
mddTree$labels <- mddPheno
plot(mddTree)
table(mddTree)
num.clust <- 5
mddCuts <- cutree(mddTree,k=num.clust)
table(names(mddCuts),mddCuts)
?table
prop.table(names(mddCuts),mddCuts)
mddCutstable <- table(names(mddCuts),mddCuts)
prop.table(mddCutstable)
prop.table(mddCutstable, margin=1)
prop.table(mddCutstable, margin=2)
if (!require("dendextend")) install.packages("dendextend")
library(dendextend)
dend <- as.dendrogram(mddTree)
dend=color_labels(dend, k=num.clust)
#dend <- dend %>% color_branches(k = 4)
plot(dend) # selective coloring of branches AND labels
## 3: Clusters
if (!require("WGCNA"))
BiocManager::install("WGCNA")
library(WGCNA)
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
dynamicMods = cutreeDynamic(dendro = mddTree, distM = d,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = 2)
mddColors = labels2colors(dynamicMods)
table(mddColors)
table(mddColors,names(mddCuts))
plotDendroAndColors(mddTree, mddColors, "Dynamic Clusters",
dendroLabels = NULL, # hang = -1,
addGuide = TRUE, #guideHang = 0.05,
main = "Clustering with WGCNA")
mddColorstable <- table(mddColors,names(mddCuts))
prop.table(mddColorstable)
prop.table(mddColorstable, margin = 1)
## 4: UMAP
if (!require("umap")) install.packages("umap")
library(umap)
# change umap config parameters
custom.config = umap.defaults
custom.config$random_state = 123
custom.config$n_epochs = 500
# umap to cluster observations
obs_umap = umap(expr_SxG, config=custom.config)
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
#colors[dats[,ncol(dats)]==1]="red"
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
colors[dats[,ncol(dats)]==1]="red"
# colors[dats[,ncol(dats)]==1]="red"
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
# umap to cluster observations
obs_umap = umap(expr_SxG, config=custom.config)
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
# colors[dats[,ncol(dats)]==1]="red"
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
legend("bottomleft", legend = c("class -1","class +1"),
col=c("black","red"), pch=19)
# colors[dats[,ncol(dats)]==1]="red"
colors[expr_SxG[,ncol(expr_SxG)]==1]="red"
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
legend("bottomleft", legend = c("class -1","class +1"),
col=c("black","red"), pch=19)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
# colors[dats[,ncol(dats)]==1]="red"
colors = rep("red",ncol(expr_SxG))
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG), "red",ncol(expr_SxG))
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
nrow(expr_SxG)
obs_umap
obs_umap$data
obs_umap
obs_umap$layout
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
# colors[dats[,ncol(dats)]==1]="red"
colors[obs_umap[,ncol(obs_umap)]>0]="red"
# colors[dats[,ncol(dats)]==1]="red"
colors[obs_umap$layout[,nrow(obs_umap$layout)]>0]="red"
# colors[dats[,ncol(dats)]==1]="red"
colors[obs_umap[,nrow(obs_umap$layout)]>0]="red"
obs_umap$layout
# colors[dats[,ncol(dats)]==1]="red"
colors[obs_umap$layout[,1]>0]="red"
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
obs_umap
obs_umap$knn
obs_umap$layout
obs_umap$layout[0]
obs_umap$layout[1]
obs_umap$layout$name
obs_umap$layout$names
obs_umap$layout
obs_umap$layout$HC
obs_umap
obs_umap$config
obs_umap$data
obs_umap
# umap to cluster observations
obs_umap = umap(expr_SxG, config=custom.config)
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
legend("bottomleft", legend = c("class -1","class +1"),
col=c("black","red"), pch=19)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
plot(obs_umap$layout, #col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
rownames(expr_SxG)
labels2colors(obs_umap)
labels2colors(as.df(obs_umap))
labels2colors(as.dataframe(obs_umap))
rownames(expr_SxG)
colors = rownames(expr_SxG)
colors
sapply(colors, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red"))
sapply(colors[i], ifelse(grep(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red"))
colors <- sapply(expr_SxG, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red"))
colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red")))
colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"))
colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)) "black", "red"))
colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"))
colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"})
colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE)), "black", "red"})
colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
?sapply
colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), MDD, fixed=TRUE), "black", "red")})
colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD"", fixed=TRUE), "black", "red")})
colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
grep(rownames(expr_SxG), "MDD")
grep(rownames(expr_SxG), "HC")
grep(rownames(expr_SxG), "HC", fixed=TRUE)
colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
rownames(expr_SxG[1])
rownames(expr_SxG[1][1])
rownames(expr_SxG[1],[1])
rownames(expr_SxG[1,1])
rownames(expr_SxG[2,2])
rownames(expr_SxG[2])
rownames(expr_SxG)[1]
rownames(expr_SxG)[2]
colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")})
colors
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")})
colors
{ifelse(grep(x,
"MDD", fixed=TRUE),
"black", "red")})
# colors[dats[,ncol(dats)]==1]="red"
colors <- sapply(expr_SxG, function(x)
{ifelse(
grep(x, "MDD", fixed=TRUE),
"black", "red")
}
)
colors
ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red"
)
colors <- ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red"
)
colors
colors <- ifelse(grepl(expr_SxG, "MDD", fixed=TRUE), "black", "red")
colors
# colors[dats[,ncol(dats)]==1]="red"
colors <- sapply(expr_SxG, function(x)
{ifelse(
grepl(x, "MDD", fixed=TRUE),
"black", "red")
}
)
colors
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
{ifelse(
grepl(expr_SxG[x], "MDD", fixed=TRUE),
"black", "red")
}
expr_SxG
{ifelse(
grepl(rownames(x), "MDD", fixed=TRUE),
"black", "red")
}
{ifelse(
grepl(rownames(SxG)[x], "MDD", fixed=TRUE),
"black", "red")
}
{ifelse(
grepl(rownames(expr_SxG)[x], "MDD", fixed=TRUE),
"black", "red")
}
for (i in numrows(expr_SxG)) {
colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
}
for (i in nrow(expr_SxG)) {
colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
}
expr_SxG
expr_SxG[1]
for (i in nrow(expr_SxG)) {
colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
}
colors
colors[i] <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
colors
expr_SxG
expr_SxG[1][1]
expr_SxG[1]
expr_SxG[2]
expr_SxG[1]
expr_SxG[1,1]
expr_SxG[0,1]
expr_SxG[0,0]
expr_SxG[1,0]
expr_SxG[1,2]
expr_SxG[3,2]$name
class(expr_SxG[3,2])
class(expr_SxG)
expr_SxG[3,2]$label
expr_SxG[3,2]$labels
labels(expr_SxG)
labels(expr_SxG[1])
labels(expr_SxG[1,1])
labels(expr_SxG[1,2])
labels(expr_SxG[3,2])
labels(expr_SxG[1,])
labels(expr_SxG[,1])
colnames(expr_SxG)
rownames(expr_SxG)
rownames(expr_SxG[1])
rownames(expr_SxG[1])[1]
rownames(expr_SxG[1])[2]
rownames(expr_SxG[1])[31]
for (i in nrow(expr_SxG)) {
colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red")
}
colors
colors <- list()
for (i in nrow(expr_SxG)) {
colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red")
}
colors
rownames(expr_SxG[1])[31]
grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE)
grepl(rownames(expr_SxG)[31], MDD, fixed=TRUE)
grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE)
grepl(rownames(expr_SxG)[31], "MDD")
class(rownames(expr_SxG)[31])
grepl("MDD", rownames(expr_SxG)[31])
colors <- list()
for (i in nrow(expr_SxG)) {
colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
}
colors
grepl("MDD", rownames(expr_SxG)[157])
rownames(expr_SxG)[157]
rownames(expr_SxG)[1565]
rownames(expr_SxG)[156]
ifelse(grepl("MDD", rownames(expr_SxG)[156]), "black", "red")
ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red")
ifelse(grepl("MDD", rownames(expr_SxG)[1]), "black", "red")
colors[1]
colors[2]
colors[1,1]
colors[,1]
colors[1,]
nrow(expr_SxG)
ifelse(grepl("MDD", rownames(expr_SxG)[2]), "black", "red")
ifelse(grepl("MDD", rownames(expr_SxG)[3]), "black", "red")
ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red")
for i in nrow(expr_SxG){ print(i) {
for i in nrow(expr_SxG){ print(i) {
for (i in nrow(expr_SxG)){ print(i) {
for (i in nrow(expr_SxG)){ print(i) }
for (i in 1:nrow(expr_SxG)){ print(i) }
for (i in 1:nrow(expr_SxG)) {
colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
}
colors
# umap to cluster observations
obs_umap = umap(expr_SxG, config=custom.config)
colors <- list()
for (i in 1:nrow(expr_SxG)) {
colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
}
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
class(colors)
dim(colors)
for (i in 1:nrow(expr_SxG)) {
colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
}
colors
colors[157]
colors[157][1]
class(colors)
?plot
?plot
class(sample(letters[1:3]),10)
class(sample(letters[1:3], 10, replace=TRUE))
colors
dim
?dim
dim(colors)
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
class(colors)
colors
for (i in 1:nrow(expr_SxG)) {
colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
}
colors
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
# change umap config parameters
custom.config = umap.defaults
custom.config$random_state = 123
custom.config$n_epochs = 500
# umap to cluster observations
obs_umap = umap(expr_SxG, config=custom.config)
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
for (i in 1:nrow(expr_SxG)) {
colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
}
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
legend("bottomleft", legend = c("class -1","class +1"),
col=c("black","red"), pch=19)
custom.config = umap.defaults
custom.config$random_state = 123
custom.config$n_epochs = 500
# umap to cluster observations
obs_umap = umap(expr_SxG, config=custom.config)
#add colors for MDD/HC
colors = rep("black",nrow(expr_SxG))
for (i in 1:nrow(expr_SxG)) {
colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
}
dim(obs_umap$layout)
plot(obs_umap$layout, col=colors,
main="umap of observations", xlab="umap dim1", ylab="umap dim2")
legend("bottomleft", legend = c("MDD","HC"),
col=c("black","red"), pch=19)
data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
mean(log2(sense.filtered.cpm[,1]))),
Mode = c(getmode(sense.filtered.cpm[,1]),
getmode(log2(sense.filtered.cpm[,1]))),
Median = c(median(sense.filtered.cpm[,1]),
median(log2(sense.filtered.cpm[,1])))
)
rownames(data) = c("Original", "Log2 Transformed")
data