if (!require("dplyr")) install.packages("dplyr") library(dplyr) # create a phenotype vector # grab X (subject ids) and Diag (Diagnosis) from subject.attrs that # intersect %in% with the RNA-Seq data phenos.df <- subject.attrs %>% filter(X %in% colnames(sense.filtered.cpm)) %>% dplyr::select(X, Diag) colnames(phenos.df) # $Diag is mdd diagnosis # grab Diag column and convert character to factor mddPheno <- as.factor(phenos.df$Diag) # this is our phenotype/class vector summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control #### Part B: Normalization ## 1: log2 transformation # raw cpm boxplots and histogram of one sample boxplot(sense.filtered.cpm,range=0, ylab="raw probe intensity", main="Raw", names=mddPheno) hist(sense.filtered.cpm[,1], freq=F, ylab="density", xlab="raw probe intensity", main="Raw Data Density for Sample 1") # log2 transformed boxplots and histogram boxplot(log2(sense.filtered.cpm), range=0, ylab="log2 intensity", main="Log2 Transformed", names=mddPheno) hist(log2(sense.filtered.cpm[,1]), freq=F, ylab="density", xlab="log2 probe intensity", main="log2 Data Density for Sample 1") getmode <- function(v) { uniqv <-unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), mean(log2(sense.filtered.cpm[,1]))), Mode = c(getmode(sense.filtered.cpm[,1]), getmode(log2(sense.filtered.cpm[,1]))), Median = c(median(sense.filtered.cpm[,1])) ) rownames(data) = c("Original", "Log2 Transformed") data ## 2: Quantile Normalization # install quantile normalize #install.packages("BiocManager") if (!require("BiocManager")) install.packages("BiocManager") library(BiocManager) if (!require("preprocessCore")) BiocManager::install("preprocessCore") library(preprocessCore) # replace with custom function? # apply quantile normalization mddExprData_quantile <- normalize.quantiles(sense.filtered.cpm) boxplot(mddExprData_quantile,range=0,ylab="raw intensity", main="Quantile Normalized") sapply(mddExprData_quantile, function(x) quantile(x, probs = seq(0, 1, 1/4))) quantile(mddExprData_quantile, probs = seq(0, 1, 1/4))) quantile(mddExprData_quantile, probs = seq(0, 1, 1/4)) length(mddExprData_quantile) sapply(mddExprData_quantile, function(x) quantile(mddExprData_quantile, probs = seq(0, 1, 1/4))) head(mddExprData_quantile) head(mddExprData_quantile) head(mddExprData_quantile[,3]) head(mddExprData_quantile[,1-3]) head(mddExprData_quantile) ### 3: Log2 on quantile normalized data mddExprData_quantileLog2 <- log2(mddExprData_quantile) # add phenotype names to matrix colnames(mddExprData_quantileLog2) <- mddPheno boxplot(mddExprData_quantileLog2,range=0, ylab="log2 intensity", main="Quantile Normalized Log2") hist(log2(mddExprData_quantileLog2[,1]), freq=F, ylab="density", xlab="log2 probe intensity", main="log2 Quantile Normalized for Sample 1") ## 4: Means mean(mddExprData_quantileLog2[,1]) colMeans(mddExprData_quantileLog2) ## 4: Means mean(mddExprData_quantileLog2[,1]) expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene colnames(expr_SxG) <- rownames(sense.filtered.cpm) # add gene names ## MDS of subjects #d<-dist(expr_SxG) # Euclidean metric mddCorr<-cor(t(expr_SxG)) # distance based on correlation d <- sqrt(1-mddCorr) mdd.mds <- cmdscale(d, k = 2) x <- mdd.mds[,1] y <- mdd.mds[,2] mdd.mds.df <- data.frame(mdd.mds) colnames(mdd.mds.df) <- c("dim1","dim2") if (!require("ggplot2")) BiocManager::install("ggplot2") library(ggplot2) p <- ggplot() # initialize empty ggplot object p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3) p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2") print(p) expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene colnames(expr_SxG) <- rownames(sense.filtered.cpm) # add gene names ## MDS of subjects #d<-dist(expr_SxG) # Euclidean metric mddCorr<-cor(t(expr_SxG)) # distance based on correlation d <- sqrt(1-mddCorr) mdd.mds <- cmdscale(d, k = 2) x <- mdd.mds[,1] y <- mdd.mds[,2] mdd.mds.df <- data.frame(mdd.mds) colnames(mdd.mds.df) <- c("dim1","dim2") if (!require("ggplot2")) BiocManager::install("ggplot2") library(ggplot2) p <- ggplot() # initialize empty ggplot object p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3) p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2") print(p) dim(mdd.mds) dim(mdd.mds.df) library(umap) mddTree = hclust(as.dist(d)) mddTree$labels <- mddPheno plot(mddTree) num.clust <- 5 mddCuts <- cutree(mddTree,k=num.clust) table(names(mddCuts),mddCuts) if (!require("dendextend")) install.packages("dendextend") library(dendextend) if (!require("dendextend")) install.packages("dendextend") library(dendextend) ## 4: UMAP if (!require("umap")) install.packages("umap") library(umap) ## 2: hierarchical cluster of subjects mddTree = hclust(as.dist(d)) mddTree$labels <- mddPheno plot(mddTree) ## 2: hierarchical cluster of subjects mddTree = hclust(as.dist(d)) mddTree$labels <- mddPheno plot(mddTree) table(mddTree) num.clust <- 5 mddCuts <- cutree(mddTree,k=num.clust) table(names(mddCuts),mddCuts) ?table prop.table(names(mddCuts),mddCuts) mddCutstable <- table(names(mddCuts),mddCuts) prop.table(mddCutstable) prop.table(mddCutstable, margin=1) prop.table(mddCutstable, margin=2) if (!require("dendextend")) install.packages("dendextend") library(dendextend) dend <- as.dendrogram(mddTree) dend=color_labels(dend, k=num.clust) #dend <- dend %>% color_branches(k = 4) plot(dend) # selective coloring of branches AND labels ## 3: Clusters if (!require("WGCNA")) BiocManager::install("WGCNA") library(WGCNA) # Plot the dendrogram and colors underneath sizeGrWindow(8,6) dynamicMods = cutreeDynamic(dendro = mddTree, distM = d, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = 2) mddColors = labels2colors(dynamicMods) table(mddColors) table(mddColors,names(mddCuts)) plotDendroAndColors(mddTree, mddColors, "Dynamic Clusters", dendroLabels = NULL, # hang = -1, addGuide = TRUE, #guideHang = 0.05, main = "Clustering with WGCNA") mddColorstable <- table(mddColors,names(mddCuts)) prop.table(mddColorstable) prop.table(mddColorstable, margin = 1) ## 4: UMAP if (!require("umap")) install.packages("umap") library(umap) # change umap config parameters custom.config = umap.defaults custom.config$random_state = 123 custom.config$n_epochs = 500 # umap to cluster observations obs_umap = umap(expr_SxG, config=custom.config) #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) #colors[dats[,ncol(dats)]==1]="red" dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) colors[dats[,ncol(dats)]==1]="red" # colors[dats[,ncol(dats)]==1]="red" dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") # umap to cluster observations obs_umap = umap(expr_SxG, config=custom.config) #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) # colors[dats[,ncol(dats)]==1]="red" dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") legend("bottomleft", legend = c("class -1","class +1"), col=c("black","red"), pch=19) # colors[dats[,ncol(dats)]==1]="red" colors[expr_SxG[,ncol(expr_SxG)]==1]="red" dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") legend("bottomleft", legend = c("class -1","class +1"), col=c("black","red"), pch=19) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") # colors[dats[,ncol(dats)]==1]="red" colors = rep("red",ncol(expr_SxG)) dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") #add colors for MDD/HC colors = rep("black",nrow(expr_SxG), "red",ncol(expr_SxG)) dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) nrow(expr_SxG) obs_umap obs_umap$data obs_umap obs_umap$layout #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) # colors[dats[,ncol(dats)]==1]="red" colors[obs_umap[,ncol(obs_umap)]>0]="red" # colors[dats[,ncol(dats)]==1]="red" colors[obs_umap$layout[,nrow(obs_umap$layout)]>0]="red" # colors[dats[,ncol(dats)]==1]="red" colors[obs_umap[,nrow(obs_umap$layout)]>0]="red" obs_umap$layout # colors[dats[,ncol(dats)]==1]="red" colors[obs_umap$layout[,1]>0]="red" dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") obs_umap obs_umap$knn obs_umap$layout obs_umap$layout[0] obs_umap$layout[1] obs_umap$layout$name obs_umap$layout$names obs_umap$layout obs_umap$layout$HC obs_umap obs_umap$config obs_umap$data obs_umap # umap to cluster observations obs_umap = umap(expr_SxG, config=custom.config) dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") legend("bottomleft", legend = c("class -1","class +1"), col=c("black","red"), pch=19) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") plot(obs_umap$layout, #col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") rownames(expr_SxG) labels2colors(obs_umap) labels2colors(as.df(obs_umap)) labels2colors(as.dataframe(obs_umap)) rownames(expr_SxG) colors = rownames(expr_SxG) colors sapply(colors, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red")) sapply(colors[i], ifelse(grep(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")) colors <- sapply(expr_SxG, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red")) colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red"))) colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red")) colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)) "black", "red")) colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red")) colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"}) colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE)), "black", "red"}) colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")}) colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")}) ?sapply colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), MDD, fixed=TRUE), "black", "red")}) colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD"", fixed=TRUE), "black", "red")}) colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")}) grep(rownames(expr_SxG), "MDD") grep(rownames(expr_SxG), "HC") grep(rownames(expr_SxG), "HC", fixed=TRUE) colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")}) rownames(expr_SxG[1]) rownames(expr_SxG[1][1]) rownames(expr_SxG[1],[1]) rownames(expr_SxG[1,1]) rownames(expr_SxG[2,2]) rownames(expr_SxG[2]) rownames(expr_SxG)[1] rownames(expr_SxG)[2] colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")}) colors #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")}) colors {ifelse(grep(x, "MDD", fixed=TRUE), "black", "red")}) # colors[dats[,ncol(dats)]==1]="red" colors <- sapply(expr_SxG, function(x) {ifelse( grep(x, "MDD", fixed=TRUE), "black", "red") } ) colors ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red" ) colors <- ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red" ) colors colors <- ifelse(grepl(expr_SxG, "MDD", fixed=TRUE), "black", "red") colors # colors[dats[,ncol(dats)]==1]="red" colors <- sapply(expr_SxG, function(x) {ifelse( grepl(x, "MDD", fixed=TRUE), "black", "red") } ) colors dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") {ifelse( grepl(expr_SxG[x], "MDD", fixed=TRUE), "black", "red") } expr_SxG {ifelse( grepl(rownames(x), "MDD", fixed=TRUE), "black", "red") } {ifelse( grepl(rownames(SxG)[x], "MDD", fixed=TRUE), "black", "red") } {ifelse( grepl(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red") } for (i in numrows(expr_SxG)) { colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red") } for (i in nrow(expr_SxG)) { colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red") } expr_SxG expr_SxG[1] for (i in nrow(expr_SxG)) { colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red") } colors colors[i] <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red") colors expr_SxG expr_SxG[1][1] expr_SxG[1] expr_SxG[2] expr_SxG[1] expr_SxG[1,1] expr_SxG[0,1] expr_SxG[0,0] expr_SxG[1,0] expr_SxG[1,2] expr_SxG[3,2]$name class(expr_SxG[3,2]) class(expr_SxG) expr_SxG[3,2]$label expr_SxG[3,2]$labels labels(expr_SxG) labels(expr_SxG[1]) labels(expr_SxG[1,1]) labels(expr_SxG[1,2]) labels(expr_SxG[3,2]) labels(expr_SxG[1,]) labels(expr_SxG[,1]) colnames(expr_SxG) rownames(expr_SxG) rownames(expr_SxG[1]) rownames(expr_SxG[1])[1] rownames(expr_SxG[1])[2] rownames(expr_SxG[1])[31] for (i in nrow(expr_SxG)) { colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red") } colors colors <- list() for (i in nrow(expr_SxG)) { colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red") } colors rownames(expr_SxG[1])[31] grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE) grepl(rownames(expr_SxG)[31], MDD, fixed=TRUE) grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE) grepl(rownames(expr_SxG)[31], "MDD") class(rownames(expr_SxG)[31]) grepl("MDD", rownames(expr_SxG)[31]) colors <- list() for (i in nrow(expr_SxG)) { colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") } colors grepl("MDD", rownames(expr_SxG)[157]) rownames(expr_SxG)[157] rownames(expr_SxG)[1565] rownames(expr_SxG)[156] ifelse(grepl("MDD", rownames(expr_SxG)[156]), "black", "red") ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red") ifelse(grepl("MDD", rownames(expr_SxG)[1]), "black", "red") colors[1] colors[2] colors[1,1] colors[,1] colors[1,] nrow(expr_SxG) ifelse(grepl("MDD", rownames(expr_SxG)[2]), "black", "red") ifelse(grepl("MDD", rownames(expr_SxG)[3]), "black", "red") ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red") for i in nrow(expr_SxG){ print(i) { for i in nrow(expr_SxG){ print(i) { for (i in nrow(expr_SxG)){ print(i) { for (i in nrow(expr_SxG)){ print(i) } for (i in 1:nrow(expr_SxG)){ print(i) } for (i in 1:nrow(expr_SxG)) { colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") } colors # umap to cluster observations obs_umap = umap(expr_SxG, config=custom.config) colors <- list() for (i in 1:nrow(expr_SxG)) { colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") } dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") class(colors) dim(colors) for (i in 1:nrow(expr_SxG)) { colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") } colors colors[157] colors[157][1] class(colors) ?plot ?plot class(sample(letters[1:3]),10) class(sample(letters[1:3], 10, replace=TRUE)) colors dim ?dim dim(colors) #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) class(colors) colors for (i in 1:nrow(expr_SxG)) { colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") } colors plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") # change umap config parameters custom.config = umap.defaults custom.config$random_state = 123 custom.config$n_epochs = 500 # umap to cluster observations obs_umap = umap(expr_SxG, config=custom.config) #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) for (i in 1:nrow(expr_SxG)) { colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") } dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") legend("bottomleft", legend = c("class -1","class +1"), col=c("black","red"), pch=19) custom.config = umap.defaults custom.config$random_state = 123 custom.config$n_epochs = 500 # umap to cluster observations obs_umap = umap(expr_SxG, config=custom.config) #add colors for MDD/HC colors = rep("black",nrow(expr_SxG)) for (i in 1:nrow(expr_SxG)) { colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") } dim(obs_umap$layout) plot(obs_umap$layout, col=colors, main="umap of observations", xlab="umap dim1", ylab="umap dim2") legend("bottomleft", legend = c("MDD","HC"), col=c("black","red"), pch=19) data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), mean(log2(sense.filtered.cpm[,1]))), Mode = c(getmode(sense.filtered.cpm[,1]), getmode(log2(sense.filtered.cpm[,1]))), Median = c(median(sense.filtered.cpm[,1]), median(log2(sense.filtered.cpm[,1]))) ) rownames(data) = c("Original", "Log2 Transformed") data