Finalizing Part C, generating report

2022-09-23 13:32:02 -05:00 · 2022-09-23 13:32:02 -05:00 · 2f888ff65b
commit 2f888ff65b
parent d7e2ce00f4
6 changed files with 557 additions and 468 deletions
--- a/.Rhistory
+++ b/.Rhistory
@ -1,426 +1,4 @@
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2)
 ################# Linear model: Least-Squares Fit #################
 g.breaks <- g.hist$breaks[-c(1,2,3)] # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.breaks[nz.probs.mask]
 #plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
 # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
 # Degree Distribution
 # Professor: Dr. McKinney, Spring 2022
 # Noah Schrick - 1492657
 library(igraph)
 library(igraphdata)
 data(yeast)
 g <- yeast
 g.netname <- "Yeast"
 ################# Set up Work #################
 g.vec <- degree(g)
 g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
 " Network"))
 legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
 "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
 "#006CD1", "#E66100", "#D35FB7"))
 g.mean <- mean(g.vec)
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2)
 ################# Linear model: Least-Squares Fit #################
 g.breaks <- g.hist$breaks[-c(1)] # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.breaks[nz.probs.mask]
 #plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
 # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
 # Degree Distribution
 # Professor: Dr. McKinney, Spring 2022
 # Noah Schrick - 1492657
 library(igraph)
 library(igraphdata)
 data(yeast)
 g <- yeast
 g.netname <- "Yeast"
 ################# Set up Work #################
 g.vec <- degree(g)
 g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
 " Network"))
 legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
 "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
 "#006CD1", "#E66100", "#D35FB7"))
 g.mean <- mean(g.vec)
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2)
 ################# Linear model: Least-Squares Fit #################
 #g.breaks <- g.hist$breaks[-c(1)] # remove 0
 g.breaks <- g.hist$breaks # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.breaks[nz.probs.mask]
 #plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
 # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
 # Degree Distribution
 # Professor: Dr. McKinney, Spring 2022
 # Noah Schrick - 1492657
 library(igraph)
 library(igraphdata)
 data(yeast)
 g <- yeast
 g.netname <- "Yeast"
 ################# Set up Work #################
 g.vec <- degree(g)
 g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
 " Network"))
 legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
 "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
 "#006CD1", "#E66100", "#D35FB7"))
 g.mean <- mean(g.vec)
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2)
 ################# Linear model: Least-Squares Fit #################
 g.breaks <- g.hist$breaks[-c(1)] # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.probs[nz.probs.mask]
 #plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
 alpha.LM
 # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
 # Degree Distribution
 # Professor: Dr. McKinney, Spring 2022
 # Noah Schrick - 1492657
 library(igraph)
 library(igraphdata)
 data(yeast)
 g <- yeast
 g.netname <- "Yeast"
 ################# Set up Work #################
 g.vec <- degree(g)
 g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
 " Network"))
 legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
 "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
 "#006CD1", "#E66100", "#D35FB7"))
 g.mean <- mean(g.vec)
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2)
 ################# Linear model: Least-Squares Fit #################
 g.breaks <- g.hist$breaks[-c(1)] # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.probs[nz.probs.mask]
 #plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
 # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
 # Degree Distribution
 # Professor: Dr. McKinney, Spring 2022
 # Noah Schrick - 1492657
 library(igraph)
 library(igraphdata)
 data(yeast)
 g <- yeast
 g.netname <- "Yeast"
 ################# Set up Work #################
 g.vec <- degree(g)
 g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
 " Network"))
 legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
 "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
 "#006CD1", "#E66100", "#D35FB7"))
 g.mean <- mean(g.vec)
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2)
 ################# Linear model: Least-Squares Fit #################
 g.breaks <- g.hist$breaks[-c(1)] # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.probs[nz.probs.mask]
 #plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
 # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
 # Degree Distribution
 # Professor: Dr. McKinney, Spring 2022
 # Noah Schrick - 1492657
 library(igraph)
 library(igraphdata)
 data(yeast)
 g <- yeast
 g.netname <- "Yeast"
 ################# Set up Work #################
 g.vec <- degree(g)
 g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
 " Network"))
 legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
 "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
 "#006CD1", "#E66100", "#D35FB7"))
 g.mean <- mean(g.vec)
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2)
 ################# Linear model: Least-Squares Fit #################
 g.breaks <- g.hist$breaks[-c(1)] # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.probs[nz.probs.mask]
 #plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
 # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
 # Degree Distribution
 # Professor: Dr. McKinney, Spring 2022
 # Noah Schrick - 1492657
 library(igraph)
 library(igraphdata)
 data(yeast)
 g <- yeast
 g.netname <- "Yeast"
 ################# Set up Work #################
 g.vec <- degree(g)
 g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
 " Network"))
 legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
 "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
 "#006CD1", "#E66100", "#D35FB7"))
 g.mean <- mean(g.vec)
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3)
 ################# Linear model: Least-Squares Fit #################
 g.breaks <- g.hist$breaks[-c(1)] # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.probs[nz.probs.mask]
 #plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3)
 plot(yeast)
 hist(yeast)
 hist(g.vec)
 g.pois
 g.mean
 alpha.LM
 alpha.ML
 degree(g)
 sort(degree(g))
 sort(degree(g),decreasing=FALSE)
 sort(degree(g),decreasing=F)
 sort(degree(g),decreasing=false)
 sort(degree(g), decreasing = TRUE)
 head(sort(degree(g), decreasing = TRUE))
 stddev(degree(g))
 sd(degree(g))
 tail(sort(degree(g), decreasing = TRUE))
 plot(log(g.breaks.clean), log(g.probs.clean))
 # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
 # Degree Distribution
 # Professor: Dr. McKinney, Spring 2022
 # Noah Schrick - 1492657
 library(igraph)
 library(igraphdata)
 data(yeast)
 g <- yeast
 g.netname <- "Yeast"
 ################# Set up Work #################
 g.vec <- degree(g)
 g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
 " Network"))
 legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
 "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
 "#006CD1", "#E66100", "#D35FB7"))
 g.mean <- mean(g.vec)
 g.seq <- 0:max(g.vec) # x-axis
 ################# Guessing Alpha #################
 alpha.guess <- 1.5
 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
 ################# Poisson #################
 g.pois <- dpois(g.seq, g.mean, log=F)
 lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3)
 ################# Linear model: Least-Squares Fit #################
 g.breaks <- g.hist$breaks[-c(1)] # remove 0
 g.probs <- g.hist$density[-1] # make lengths match
 # Need to clean up probabilities that are 0
 nz.probs.mask <- g.probs!=0
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.probs[nz.probs.mask]
 plot(log(g.breaks.clean), log(g.probs.clean))
 g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
 summary(g.fit)
 alpha.LM <- coef(g.fit)[2]
 lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3)
 ################# Max-Log-Likelihood #################
 n <- length(g.breaks.clean)
 kmin <- g.breaks.clean[1]
 alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
 alpha.ML
 lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3)
 plot(log(g.breaks.clean), log(g.probs.clean))
 g.breaks.clean <- g.breaks[nz.probs.mask]
 g.probs.clean <- g.probs[nz.probs.mask]
 plot(log(g.breaks.clean), log(g.probs.clean))
 # load gene expression data
 load("sense.filtered.cpm.Rdata")
 ## Set Working Directory to file directory - RStudio approach
 setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
 # load gene expression data
 load("sense.filtered.cpm.Rdata")
 dim(sense.filtered.cpm)
 colnames(sense.filtered.cpm)
 length(sense.filtered.cpm)
 ## Set Working Directory to file directory - RStudio approach
 setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
 ## 2: Loading Demographic Data
 subject.attrs <- read.csv("Demographic_symptom.csv", stringsAsFactors = FALSE)
 dim(subject.attrs)  # 160 subjects x 40 attributes
 colnames(subject.attrs)  # interested in X (sample ids) and Diag (diagnosis)
 subject.attrs$X
 subject.attrs$Diag
 dim(sense.filtered.cpm)
 colnames(sense.filtered.cpm)
 rownames(sense.filtered.cpm)
 # Matching gene expression samples with their diagnosis
 if (!require("dplyr")) install.packages("dplyr")
 # Matching gene expression samples with their diagnosis
 if (!require("dplyr")) install.packages("dplyr")
 # create a phenotype vector
 # grab X (subject ids) and Diag (Diagnosis) from subject.attrs that
 #   intersect %in% with the RNA-Seq data
 phenos.df <- subject.attrs %>%
 filter(X %in% colnames(sense.filtered.cpm)) %>%
 dplyr::select(X, Diag)
 # Matching gene expression samples with their diagnosis
 if (!require("dplyr")) install.packages("dplyr")
 # create a phenotype vector
 # grab X (subject ids) and Diag (Diagnosis) from subject.attrs that
 #   intersect %in% with the RNA-Seq data
 phenos.df <- subject.attrs %>%
 filter(X %in% colnames(sense.filtered.cpm)) %>%
 dplyr::select(X, Diag)
 library(dplyr)
 # create a phenotype vector
 # grab X (subject ids) and Diag (Diagnosis) from subject.attrs that
@ -437,59 +15,17 @@ summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control
 # raw cpm boxplots and histogram of one sample
 boxplot(sense.filtered.cpm,range=0,
 ylab="raw probe intensity", main="Raw", names=mddPheno)
 hist(sense.filtered.cpm[,1], freq=F, ylab="density",
 xlab="raw probe intensity", main="Raw Data Density for Sample 1")
 # log2 transformed boxplots and histogram
 boxplot(log2(sense.filtered.cpm), range=0,
 ylab="log2 intensity", main="Log2 Transformed", names=mddPheno)
 hist(sense.filtered.cpm[,1], freq=F, ylab="density",
 xlab="raw probe intensity", main="Raw Data Density for Sample 1")
 hist(log2(sense.filtered.cpm[,1]), freq=F, ylab="density",
 xlab="log2 probe intensity", main="log2 Data Density for Sample 1")
 median(sense.filtered.cpm[,1])
 mode(sense.filtered.cpm[,1])
 average(sense.filtered.cpm[,1])
 mean(sense.filtered.cpm[,1])
 getmode <- function(v) {
 uniqv <-unique(v)
 uniqv[which.max(tabulate(match(v, uniqv)))]
 }
 getmode(sense.filtered.cpm[,1])
 median(sense.filtered.cpm[,1])
 mean(sense.filtered.cpm[,1])
 getmode(sense.filtered.cpm)
 median(sense.filtered.cpm)
 mean(sense.filtered.cpm)
 getmode(log2(sense.filtered.cpm))
 median(log2(sense.filtered.cpm))
 mean(log2(sense.filtered.cpm))
 log2(sense.filtered.cpm)
 log2(sense.filtered.cpm[,1])
 getmode(log2(sense.filtered.cpm[,1]))
 median(log2(sense.filtered.cpm[,1]))
 mean(log2(sense.filtered.cpm[,1]))
 data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
 data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
 mean(log2(sense.filtered.cpm[,1]))),
 Mode = c(getmode(sense.filtered.cpm[,1]),
 getmode(log2(sense.filtered.cpm[,1]))),
 Median = c(median(sense.filtered.cpm[,1])),
 )
 data
 data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
 mean(log2(sense.filtered.cpm[,1]))),
 Mode = c(getmode(sense.filtered.cpm[,1]),
 getmode(log2(sense.filtered.cpm[,1]))),
 Median = c(median(sense.filtered.cpm[,1])),
 )
 data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
 mean(log2(sense.filtered.cpm[,1]))),
 Mode = c(getmode(sense.filtered.cpm[,1]),
 getmode(log2(sense.filtered.cpm[,1]))),
 Median = c(median(sense.filtered.cpm[,1]))
 )
 data
 rownames(data) = c("Original", "Log2 Transformed")
 data
 table(data)
 data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
 mean(log2(sense.filtered.cpm[,1]))),
 Mode = c(getmode(sense.filtered.cpm[,1]),
@ -510,3 +46,467 @@ library(preprocessCore)    # replace with custom function?
 mddExprData_quantile <- normalize.quantiles(sense.filtered.cpm)
 boxplot(mddExprData_quantile,range=0,ylab="raw intensity", main="Quantile Normalized")
 sapply(mddExprData_quantile, function(x) quantile(x, probs = seq(0, 1, 1/4)))
 quantile(mddExprData_quantile, probs = seq(0, 1, 1/4)))
 quantile(mddExprData_quantile, probs = seq(0, 1, 1/4))
 length(mddExprData_quantile)
 sapply(mddExprData_quantile, function(x) quantile(mddExprData_quantile, probs = seq(0, 1, 1/4)))
 head(mddExprData_quantile)
 head(mddExprData_quantile)
 head(mddExprData_quantile[,3])
 head(mddExprData_quantile[,1-3])
 head(mddExprData_quantile)
 ### 3: Log2 on quantile normalized data
 mddExprData_quantileLog2 <- log2(mddExprData_quantile)
 # add phenotype names to matrix
 colnames(mddExprData_quantileLog2) <- mddPheno
 boxplot(mddExprData_quantileLog2,range=0,
 ylab="log2 intensity", main="Quantile Normalized Log2")
 hist(log2(mddExprData_quantileLog2[,1]), freq=F,
 ylab="density", xlab="log2 probe intensity",
 main="log2 Quantile Normalized for Sample 1")
 ## 4: Means
 mean(mddExprData_quantileLog2[,1])
 colMeans(mddExprData_quantileLog2)
 ## 4: Means
 mean(mddExprData_quantileLog2[,1])
 expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene
 colnames(expr_SxG) <- rownames(sense.filtered.cpm)  # add gene names
 ## MDS of subjects
 #d<-dist(expr_SxG)         # Euclidean metric
 mddCorr<-cor(t(expr_SxG))  # distance based on correlation
 d <- sqrt(1-mddCorr)
 mdd.mds <- cmdscale(d, k = 2)
 x <- mdd.mds[,1]
 y <- mdd.mds[,2]
 mdd.mds.df <- data.frame(mdd.mds)
 colnames(mdd.mds.df) <- c("dim1","dim2")
 if (!require("ggplot2"))
 BiocManager::install("ggplot2")
 library(ggplot2)
 p <- ggplot() # initialize empty ggplot object
 p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3)
 p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2")
 print(p)
 expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene
 colnames(expr_SxG) <- rownames(sense.filtered.cpm)  # add gene names
 ## MDS of subjects
 #d<-dist(expr_SxG)         # Euclidean metric
 mddCorr<-cor(t(expr_SxG))  # distance based on correlation
 d <- sqrt(1-mddCorr)
 mdd.mds <- cmdscale(d, k = 2)
 x <- mdd.mds[,1]
 y <- mdd.mds[,2]
 mdd.mds.df <- data.frame(mdd.mds)
 colnames(mdd.mds.df) <- c("dim1","dim2")
 if (!require("ggplot2"))
 BiocManager::install("ggplot2")
 library(ggplot2)
 p <- ggplot() # initialize empty ggplot object
 p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3)
 p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2")
 print(p)
 dim(mdd.mds)
 dim(mdd.mds.df)
 library(umap)
 mddTree = hclust(as.dist(d))
 mddTree$labels <- mddPheno
 plot(mddTree)
 num.clust <- 5
 mddCuts <- cutree(mddTree,k=num.clust)
 table(names(mddCuts),mddCuts)
 if (!require("dendextend")) install.packages("dendextend")
 library(dendextend)
 if (!require("dendextend")) install.packages("dendextend")
 library(dendextend)
 ## 4: UMAP
 if (!require("umap")) install.packages("umap")
 library(umap)
 ## 2: hierarchical cluster of subjects
 mddTree = hclust(as.dist(d))
 mddTree$labels <- mddPheno
 plot(mddTree)
 ## 2: hierarchical cluster of subjects
 mddTree = hclust(as.dist(d))
 mddTree$labels <- mddPheno
 plot(mddTree)
 table(mddTree)
 num.clust <- 5
 mddCuts <- cutree(mddTree,k=num.clust)
 table(names(mddCuts),mddCuts)
 ?table
 prop.table(names(mddCuts),mddCuts)
 mddCutstable <- table(names(mddCuts),mddCuts)
 prop.table(mddCutstable)
 prop.table(mddCutstable, margin=1)
 prop.table(mddCutstable, margin=2)
 if (!require("dendextend")) install.packages("dendextend")
 library(dendextend)
 dend <- as.dendrogram(mddTree)
 dend=color_labels(dend, k=num.clust)
 #dend <- dend %>% color_branches(k = 4)
 plot(dend) # selective coloring of branches AND labels
 ## 3: Clusters
 if (!require("WGCNA"))
 BiocManager::install("WGCNA")
 library(WGCNA)
 # Plot the dendrogram and colors underneath
 sizeGrWindow(8,6)
 dynamicMods = cutreeDynamic(dendro = mddTree, distM = d,
 deepSplit = 2, pamRespectsDendro = FALSE,
 minClusterSize = 2)
 mddColors = labels2colors(dynamicMods)
 table(mddColors)
 table(mddColors,names(mddCuts))
 plotDendroAndColors(mddTree, mddColors, "Dynamic Clusters",
 dendroLabels = NULL, # hang = -1,
 addGuide = TRUE, #guideHang = 0.05,
 main = "Clustering with WGCNA")
 mddColorstable <- table(mddColors,names(mddCuts))
 prop.table(mddColorstable)
 prop.table(mddColorstable, margin = 1)
 ## 4: UMAP
 if (!require("umap")) install.packages("umap")
 library(umap)
 # change umap config parameters
 custom.config = umap.defaults
 custom.config$random_state = 123
 custom.config$n_epochs = 500
 # umap to cluster observations
 obs_umap = umap(expr_SxG, config=custom.config)
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 #colors[dats[,ncol(dats)]==1]="red"
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 colors[dats[,ncol(dats)]==1]="red"
 # colors[dats[,ncol(dats)]==1]="red"
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 # umap to cluster observations
 obs_umap = umap(expr_SxG, config=custom.config)
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 # colors[dats[,ncol(dats)]==1]="red"
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 legend("bottomleft", legend = c("class -1","class +1"),
 col=c("black","red"), pch=19)
 # colors[dats[,ncol(dats)]==1]="red"
 colors[expr_SxG[,ncol(expr_SxG)]==1]="red"
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 legend("bottomleft", legend = c("class -1","class +1"),
 col=c("black","red"), pch=19)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 # colors[dats[,ncol(dats)]==1]="red"
 colors = rep("red",ncol(expr_SxG))
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG), "red",ncol(expr_SxG))
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 nrow(expr_SxG)
 obs_umap
 obs_umap$data
 obs_umap
 obs_umap$layout
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 # colors[dats[,ncol(dats)]==1]="red"
 colors[obs_umap[,ncol(obs_umap)]>0]="red"
 # colors[dats[,ncol(dats)]==1]="red"
 colors[obs_umap$layout[,nrow(obs_umap$layout)]>0]="red"
 # colors[dats[,ncol(dats)]==1]="red"
 colors[obs_umap[,nrow(obs_umap$layout)]>0]="red"
 obs_umap$layout
 # colors[dats[,ncol(dats)]==1]="red"
 colors[obs_umap$layout[,1]>0]="red"
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 obs_umap
 obs_umap$knn
 obs_umap$layout
 obs_umap$layout[0]
 obs_umap$layout[1]
 obs_umap$layout$name
 obs_umap$layout$names
 obs_umap$layout
 obs_umap$layout$HC
 obs_umap
 obs_umap$config
 obs_umap$data
 obs_umap
 # umap to cluster observations
 obs_umap = umap(expr_SxG, config=custom.config)
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 legend("bottomleft", legend = c("class -1","class +1"),
 col=c("black","red"), pch=19)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 plot(obs_umap$layout, #col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 rownames(expr_SxG)
 labels2colors(obs_umap)
 labels2colors(as.df(obs_umap))
 labels2colors(as.dataframe(obs_umap))
 rownames(expr_SxG)
 colors = rownames(expr_SxG)
 colors
 sapply(colors, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red"))
 sapply(colors[i], ifelse(grep(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red"))
 colors <- sapply(expr_SxG, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red"))
 colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red")))
 colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"))
 colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)) "black", "red"))
 colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"))
 colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"})
 colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE)), "black", "red"})
 colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
 colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
 ?sapply
 colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), MDD, fixed=TRUE), "black", "red")})
 colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD"", fixed=TRUE), "black", "red")})
 colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
 grep(rownames(expr_SxG), "MDD")
 grep(rownames(expr_SxG), "HC")
 grep(rownames(expr_SxG), "HC", fixed=TRUE)
 colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
 rownames(expr_SxG[1])
 rownames(expr_SxG[1][1])
 rownames(expr_SxG[1],[1])
 rownames(expr_SxG[1,1])
 rownames(expr_SxG[2,2])
 rownames(expr_SxG[2])
 rownames(expr_SxG)[1]
 rownames(expr_SxG)[2]
 colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")})
 colors
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")})
 colors
 {ifelse(grep(x,
 "MDD", fixed=TRUE),
 "black", "red")})
 # colors[dats[,ncol(dats)]==1]="red"
 colors <- sapply(expr_SxG, function(x)
 {ifelse(
 grep(x, "MDD", fixed=TRUE),
 "black", "red")
 }
 )
 colors
 ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red"
 )
 colors <- ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red"
 )
 colors
 colors <- ifelse(grepl(expr_SxG, "MDD", fixed=TRUE), "black", "red")
 colors
 # colors[dats[,ncol(dats)]==1]="red"
 colors <- sapply(expr_SxG, function(x)
 {ifelse(
 grepl(x, "MDD", fixed=TRUE),
 "black", "red")
 }
 )
 colors
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 {ifelse(
 grepl(expr_SxG[x], "MDD", fixed=TRUE),
 "black", "red")
 }
 expr_SxG
 {ifelse(
 grepl(rownames(x), "MDD", fixed=TRUE),
 "black", "red")
 }
 {ifelse(
 grepl(rownames(SxG)[x], "MDD", fixed=TRUE),
 "black", "red")
 }
 {ifelse(
 grepl(rownames(expr_SxG)[x], "MDD", fixed=TRUE),
 "black", "red")
 }
 for (i in numrows(expr_SxG)) {
 colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
 }
 for (i in nrow(expr_SxG)) {
 colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
 }
 expr_SxG
 expr_SxG[1]
 for (i in nrow(expr_SxG)) {
 colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
 }
 colors
 colors[i] <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
 colors
 expr_SxG
 expr_SxG[1][1]
 expr_SxG[1]
 expr_SxG[2]
 expr_SxG[1]
 expr_SxG[1,1]
 expr_SxG[0,1]
 expr_SxG[0,0]
 expr_SxG[1,0]
 expr_SxG[1,2]
 expr_SxG[3,2]$name
 class(expr_SxG[3,2])
 class(expr_SxG)
 expr_SxG[3,2]$label
 expr_SxG[3,2]$labels
 labels(expr_SxG)
 labels(expr_SxG[1])
 labels(expr_SxG[1,1])
 labels(expr_SxG[1,2])
 labels(expr_SxG[3,2])
 labels(expr_SxG[1,])
 labels(expr_SxG[,1])
 colnames(expr_SxG)
 rownames(expr_SxG)
 rownames(expr_SxG[1])
 rownames(expr_SxG[1])[1]
 rownames(expr_SxG[1])[2]
 rownames(expr_SxG[1])[31]
 for (i in nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red")
 }
 colors
 colors <- list()
 for (i in nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red")
 }
 colors
 rownames(expr_SxG[1])[31]
 grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE)
 grepl(rownames(expr_SxG)[31], MDD, fixed=TRUE)
 grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE)
 grepl(rownames(expr_SxG)[31], "MDD")
 class(rownames(expr_SxG)[31])
 grepl("MDD", rownames(expr_SxG)[31])
 colors <- list()
 for (i in nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
 }
 colors
 grepl("MDD", rownames(expr_SxG)[157])
 rownames(expr_SxG)[157]
 rownames(expr_SxG)[1565]
 rownames(expr_SxG)[156]
 ifelse(grepl("MDD", rownames(expr_SxG)[156]), "black", "red")
 ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red")
 ifelse(grepl("MDD", rownames(expr_SxG)[1]), "black", "red")
 colors[1]
 colors[2]
 colors[1,1]
 colors[,1]
 colors[1,]
 nrow(expr_SxG)
 ifelse(grepl("MDD", rownames(expr_SxG)[2]), "black", "red")
 ifelse(grepl("MDD", rownames(expr_SxG)[3]), "black", "red")
 ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red")
 for i in nrow(expr_SxG){ print(i) {
 for i in nrow(expr_SxG){ print(i) {
 for (i in nrow(expr_SxG)){ print(i) {
 for (i in nrow(expr_SxG)){ print(i) }
 for (i in 1:nrow(expr_SxG)){ print(i) }
 for (i in 1:nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
 }
 colors
 # umap to cluster observations
 obs_umap = umap(expr_SxG, config=custom.config)
 colors <- list()
 for (i in 1:nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
 }
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 class(colors)
 dim(colors)
 for (i in 1:nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
 }
 colors
 colors[157]
 colors[157][1]
 class(colors)
 ?plot
 ?plot
 class(sample(letters[1:3]),10)
 class(sample(letters[1:3], 10, replace=TRUE))
 colors
 dim
 ?dim
 dim(colors)
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 class(colors)
 colors
 for (i in 1:nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
 }
 colors
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 # change umap config parameters
 custom.config = umap.defaults
 custom.config$random_state = 123
 custom.config$n_epochs = 500
 # umap to cluster observations
 obs_umap = umap(expr_SxG, config=custom.config)
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 for (i in 1:nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
 }
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 legend("bottomleft", legend = c("class -1","class +1"),
 col=c("black","red"), pch=19)
 custom.config = umap.defaults
 custom.config$random_state = 123
 custom.config$n_epochs = 500
 # umap to cluster observations
 obs_umap = umap(expr_SxG, config=custom.config)
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 for (i in 1:nrow(expr_SxG)) {
 colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
 }
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors,
 main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 legend("bottomleft", legend = c("MDD","HC"),
 col=c("black","red"), pch=19)
 data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
 mean(log2(sense.filtered.cpm[,1]))),
 Mode = c(getmode(sense.filtered.cpm[,1]),
 getmode(log2(sense.filtered.cpm[,1]))),
 Median = c(median(sense.filtered.cpm[,1]),
 median(log2(sense.filtered.cpm[,1])))
 )
 rownames(data) = c("Original", "Log2 Transformed")
 data
--- a/.~lock.lab3_expression.docx#
+++ b/.~lock.lab3_expression.docx#
@ -1 +0,0 @@
 ,noah,NovaArchSys,22.09.2022 19:18,file:///home/noah/.config/libreoffice/4;
--- a/Schrick-Noah_CS-6643_Lab-3.R
+++ b/Schrick-Noah_CS-6643_Lab-3.R
@ -60,7 +60,8 @@ data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
                            mean(log2(sense.filtered.cpm[,1]))),
                   Mode = c(getmode(sense.filtered.cpm[,1]),
                            getmode(log2(sense.filtered.cpm[,1]))),
-                   Median = c(median(sense.filtered.cpm[,1]))
+                   Median = c(median(sense.filtered.cpm[,1]),
                              median(log2(sense.filtered.cpm[,1])))
                   )
 rownames(data) = c("Original", "Log2 Transformed")
 data
@ -97,3 +98,92 @@ hist(log2(mddExprData_quantileLog2[,1]), freq=F,
 ## 4: Means
 mean(mddExprData_quantileLog2[,1])
 colMeans(mddExprData_quantileLog2)
 #### Part C: Exploratory clustering of microarray data
 ## 1: MDS
 # transpose data matrix and convert to data frame
 # ggplot wants data frame and subjects as rows
 expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene
 colnames(expr_SxG) <- rownames(sense.filtered.cpm)  # add gene names
 # MDS of subjects 
 #d<-dist(expr_SxG)         # Euclidean metric
 mddCorr<-cor(t(expr_SxG))  # distance based on correlation
 d <- sqrt(1-mddCorr)
 mdd.mds <- cmdscale(d, k = 2)
 x <- mdd.mds[,1]
 y <- mdd.mds[,2]
 mdd.mds.df <- data.frame(mdd.mds)
 colnames(mdd.mds.df) <- c("dim1","dim2")
 if (!require("ggplot2"))
  BiocManager::install("ggplot2")
 library(ggplot2)
 p <- ggplot() # initialize empty ggplot object
 p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3)
 p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2")
 print(p)
 dim(mdd.mds)
 ## 2: hierarchical cluster of subjects
 mddTree = hclust(as.dist(d))
 mddTree$labels <- mddPheno
 plot(mddTree)
 num.clust <- 5
 mddCuts <- cutree(mddTree,k=num.clust)
 mddCutstable <- table(names(mddCuts),mddCuts)
 prop.table(mddCutstable, margin=2)
 if (!require("dendextend")) install.packages("dendextend")
 library(dendextend)
 dend <- as.dendrogram(mddTree)
 dend=color_labels(dend, k=num.clust)
 #dend <- dend %>% color_branches(k = 4) 
 plot(dend) # selective coloring of branches AND labels 
 ## 3: Clusters
 if (!require("WGCNA"))
  BiocManager::install("WGCNA")
 library(WGCNA)
 # Plot the dendrogram and colors underneath
 sizeGrWindow(8,6)
 dynamicMods = cutreeDynamic(dendro = mddTree, distM = d,
                            deepSplit = 2, pamRespectsDendro = FALSE,
                            minClusterSize = 2)
 mddColors = labels2colors(dynamicMods)
 table(mddColors)
 mddColorstable <- table(mddColors,names(mddCuts))
 prop.table(mddColorstable, margin = 1)
 plotDendroAndColors(mddTree, mddColors, "Dynamic Clusters",
                    dendroLabels = NULL, # hang = -1,
                    addGuide = TRUE, #guideHang = 0.05,
                    main = "Clustering with WGCNA")
 ## 4: UMAP
 if (!require("umap")) install.packages("umap")
 library(umap)
 # change umap config parameters
 custom.config = umap.defaults
 custom.config$random_state = 123
 custom.config$n_epochs = 500
 # umap to cluster observations
 obs_umap = umap(expr_SxG, config=custom.config)
 #add colors for MDD/HC
 colors = rep("black",nrow(expr_SxG))
 for (i in 1:nrow(expr_SxG)) {
  colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
 }
 dim(obs_umap$layout)
 plot(obs_umap$layout, col=colors, 
     main="umap of observations", xlab="umap dim1", ylab="umap dim2")
 legend("bottomleft", legend = c("MDD","HC"), 
       col=c("black","red"), pch=19)
--- a/Schrick-Noah_CS-6643_Lab-3.docx
+++ b/Schrick-Noah_CS-6643_Lab-3.docx
--- a/Schrick-Noah_CS-6643_Lab-3.pdf
+++ b/Schrick-Noah_CS-6643_Lab-3.pdf
--- a/lab3_expression.docx
+++ b/lab3_expression.docx
		`@ -1 +0,0 @@`
			`,noah,NovaArchSys,22.09.2022 19:18,file:///home/noah/.config/libreoffice/4;`