diff --git a/.Rhistory b/.Rhistory
index 82ef643..69af0ed 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -1,426 +1,4 @@
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2)
-################# Linear model: Least-Squares Fit #################
-g.breaks <- g.hist$breaks[-c(1,2,3)] # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.breaks[nz.probs.mask]
-#plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
-# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
-# Degree Distribution
-# Professor: Dr. McKinney, Spring 2022
-# Noah Schrick - 1492657
-library(igraph)
-library(igraphdata)
-data(yeast)
-g <- yeast
-g.netname <- "Yeast"
-################# Set up Work #################
-g.vec <- degree(g)
-g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
-" Network"))
-legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
-"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
-"#006CD1", "#E66100", "#D35FB7"))
-g.mean <- mean(g.vec)
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2)
-################# Linear model: Least-Squares Fit #################
-g.breaks <- g.hist$breaks[-c(1)] # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.breaks[nz.probs.mask]
-#plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
-# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
-# Degree Distribution
-# Professor: Dr. McKinney, Spring 2022
-# Noah Schrick - 1492657
-library(igraph)
-library(igraphdata)
-data(yeast)
-g <- yeast
-g.netname <- "Yeast"
-################# Set up Work #################
-g.vec <- degree(g)
-g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
-" Network"))
-legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
-"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
-"#006CD1", "#E66100", "#D35FB7"))
-g.mean <- mean(g.vec)
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2)
-################# Linear model: Least-Squares Fit #################
-#g.breaks <- g.hist$breaks[-c(1)] # remove 0
-g.breaks <- g.hist$breaks # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.breaks[nz.probs.mask]
-#plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
-# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
-# Degree Distribution
-# Professor: Dr. McKinney, Spring 2022
-# Noah Schrick - 1492657
-library(igraph)
-library(igraphdata)
-data(yeast)
-g <- yeast
-g.netname <- "Yeast"
-################# Set up Work #################
-g.vec <- degree(g)
-g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
-" Network"))
-legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
-"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
-"#006CD1", "#E66100", "#D35FB7"))
-g.mean <- mean(g.vec)
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2)
-################# Linear model: Least-Squares Fit #################
-g.breaks <- g.hist$breaks[-c(1)] # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.probs[nz.probs.mask]
-#plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
-alpha.LM
-# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
-# Degree Distribution
-# Professor: Dr. McKinney, Spring 2022
-# Noah Schrick - 1492657
-library(igraph)
-library(igraphdata)
-data(yeast)
-g <- yeast
-g.netname <- "Yeast"
-################# Set up Work #################
-g.vec <- degree(g)
-g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
-" Network"))
-legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
-"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
-"#006CD1", "#E66100", "#D35FB7"))
-g.mean <- mean(g.vec)
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2)
-################# Linear model: Least-Squares Fit #################
-g.breaks <- g.hist$breaks[-c(1)] # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.probs[nz.probs.mask]
-#plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
-# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
-# Degree Distribution
-# Professor: Dr. McKinney, Spring 2022
-# Noah Schrick - 1492657
-library(igraph)
-library(igraphdata)
-data(yeast)
-g <- yeast
-g.netname <- "Yeast"
-################# Set up Work #################
-g.vec <- degree(g)
-g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
-" Network"))
-legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
-"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
-"#006CD1", "#E66100", "#D35FB7"))
-g.mean <- mean(g.vec)
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2)
-################# Linear model: Least-Squares Fit #################
-g.breaks <- g.hist$breaks[-c(1)] # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.probs[nz.probs.mask]
-#plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
-# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
-# Degree Distribution
-# Professor: Dr. McKinney, Spring 2022
-# Noah Schrick - 1492657
-library(igraph)
-library(igraphdata)
-data(yeast)
-g <- yeast
-g.netname <- "Yeast"
-################# Set up Work #################
-g.vec <- degree(g)
-g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
-" Network"))
-legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
-"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
-"#006CD1", "#E66100", "#D35FB7"))
-g.mean <- mean(g.vec)
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2)
-################# Linear model: Least-Squares Fit #################
-g.breaks <- g.hist$breaks[-c(1)] # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.probs[nz.probs.mask]
-#plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
-# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
-# Degree Distribution
-# Professor: Dr. McKinney, Spring 2022
-# Noah Schrick - 1492657
-library(igraph)
-library(igraphdata)
-data(yeast)
-g <- yeast
-g.netname <- "Yeast"
-################# Set up Work #################
-g.vec <- degree(g)
-g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
-" Network"))
-legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
-"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
-"#006CD1", "#E66100", "#D35FB7"))
-g.mean <- mean(g.vec)
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3)
-################# Linear model: Least-Squares Fit #################
-g.breaks <- g.hist$breaks[-c(1)] # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.probs[nz.probs.mask]
-#plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3)
-plot(yeast)
-hist(yeast)
-hist(g.vec)
-g.pois
-g.mean
-alpha.LM
-alpha.ML
-degree(g)
-sort(degree(g))
-sort(degree(g),decreasing=FALSE)
-sort(degree(g),decreasing=F)
-sort(degree(g),decreasing=false)
-sort(degree(g), decreasing = TRUE)
-head(sort(degree(g), decreasing = TRUE))
-stddev(degree(g))
-sd(degree(g))
-tail(sort(degree(g), decreasing = TRUE))
-plot(log(g.breaks.clean), log(g.probs.clean))
-# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
-# Degree Distribution
-# Professor: Dr. McKinney, Spring 2022
-# Noah Schrick - 1492657
-library(igraph)
-library(igraphdata)
-data(yeast)
-g <- yeast
-g.netname <- "Yeast"
-################# Set up Work #################
-g.vec <- degree(g)
-g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
-" Network"))
-legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
-"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
-"#006CD1", "#E66100", "#D35FB7"))
-g.mean <- mean(g.vec)
-g.seq <- 0:max(g.vec) # x-axis
-################# Guessing Alpha #################
-alpha.guess <- 1.5
-lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
-################# Poisson #################
-g.pois <- dpois(g.seq, g.mean, log=F)
-lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3)
-################# Linear model: Least-Squares Fit #################
-g.breaks <- g.hist$breaks[-c(1)] # remove 0
-g.probs <- g.hist$density[-1] # make lengths match
-# Need to clean up probabilities that are 0
-nz.probs.mask <- g.probs!=0
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.probs[nz.probs.mask]
-plot(log(g.breaks.clean), log(g.probs.clean))
-g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
-summary(g.fit)
-alpha.LM <- coef(g.fit)[2]
-lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3)
-################# Max-Log-Likelihood #################
-n <- length(g.breaks.clean)
-kmin <- g.breaks.clean[1]
-alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
-alpha.ML
-lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3)
-plot(log(g.breaks.clean), log(g.probs.clean))
-g.breaks.clean <- g.breaks[nz.probs.mask]
-g.probs.clean <- g.probs[nz.probs.mask]
-plot(log(g.breaks.clean), log(g.probs.clean))
-# load gene expression data
-load("sense.filtered.cpm.Rdata")
-## Set Working Directory to file directory - RStudio approach
-setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
-# load gene expression data
-load("sense.filtered.cpm.Rdata")
-dim(sense.filtered.cpm)
-colnames(sense.filtered.cpm)
-length(sense.filtered.cpm)
-## Set Working Directory to file directory - RStudio approach
-setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
-## 2: Loading Demographic Data
-subject.attrs <- read.csv("Demographic_symptom.csv", stringsAsFactors = FALSE)
-dim(subject.attrs)  # 160 subjects x 40 attributes
-colnames(subject.attrs)  # interested in X (sample ids) and Diag (diagnosis)
-subject.attrs$X
-subject.attrs$Diag
-dim(sense.filtered.cpm)
-colnames(sense.filtered.cpm)
-rownames(sense.filtered.cpm)
-# Matching gene expression samples with their diagnosis
 if (!require("dplyr")) install.packages("dplyr")
-# Matching gene expression samples with their diagnosis
-if (!require("dplyr")) install.packages("dplyr")
-# create a phenotype vector
-# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that
-#   intersect %in% with the RNA-Seq data
-phenos.df <- subject.attrs %>%
-filter(X %in% colnames(sense.filtered.cpm)) %>%
-dplyr::select(X, Diag)
-# Matching gene expression samples with their diagnosis
-if (!require("dplyr")) install.packages("dplyr")
-# create a phenotype vector
-# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that
-#   intersect %in% with the RNA-Seq data
-phenos.df <- subject.attrs %>%
-filter(X %in% colnames(sense.filtered.cpm)) %>%
-dplyr::select(X, Diag)
 library(dplyr)
 # create a phenotype vector
 # grab X (subject ids) and Diag (Diagnosis) from subject.attrs that
@@ -437,59 +15,17 @@ summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control
 # raw cpm boxplots and histogram of one sample
 boxplot(sense.filtered.cpm,range=0,
 ylab="raw probe intensity", main="Raw", names=mddPheno)
+hist(sense.filtered.cpm[,1], freq=F, ylab="density",
+xlab="raw probe intensity", main="Raw Data Density for Sample 1")
 # log2 transformed boxplots and histogram
 boxplot(log2(sense.filtered.cpm), range=0,
 ylab="log2 intensity", main="Log2 Transformed", names=mddPheno)
-hist(sense.filtered.cpm[,1], freq=F, ylab="density",
-xlab="raw probe intensity", main="Raw Data Density for Sample 1")
 hist(log2(sense.filtered.cpm[,1]), freq=F, ylab="density",
 xlab="log2 probe intensity", main="log2 Data Density for Sample 1")
-median(sense.filtered.cpm[,1])
-mode(sense.filtered.cpm[,1])
-average(sense.filtered.cpm[,1])
-mean(sense.filtered.cpm[,1])
 getmode <- function(v) {
 uniqv <-unique(v)
 uniqv[which.max(tabulate(match(v, uniqv)))]
 }
-getmode(sense.filtered.cpm[,1])
-median(sense.filtered.cpm[,1])
-mean(sense.filtered.cpm[,1])
-getmode(sense.filtered.cpm)
-median(sense.filtered.cpm)
-mean(sense.filtered.cpm)
-getmode(log2(sense.filtered.cpm))
-median(log2(sense.filtered.cpm))
-mean(log2(sense.filtered.cpm))
-log2(sense.filtered.cpm)
-log2(sense.filtered.cpm[,1])
-getmode(log2(sense.filtered.cpm[,1]))
-median(log2(sense.filtered.cpm[,1]))
-mean(log2(sense.filtered.cpm[,1]))
-data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
-data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
-mean(log2(sense.filtered.cpm[,1]))),
-Mode = c(getmode(sense.filtered.cpm[,1]),
-getmode(log2(sense.filtered.cpm[,1]))),
-Median = c(median(sense.filtered.cpm[,1])),
-)
-data
-data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
-mean(log2(sense.filtered.cpm[,1]))),
-Mode = c(getmode(sense.filtered.cpm[,1]),
-getmode(log2(sense.filtered.cpm[,1]))),
-Median = c(median(sense.filtered.cpm[,1])),
-)
-data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
-mean(log2(sense.filtered.cpm[,1]))),
-Mode = c(getmode(sense.filtered.cpm[,1]),
-getmode(log2(sense.filtered.cpm[,1]))),
-Median = c(median(sense.filtered.cpm[,1]))
-)
-data
-rownames(data) = c("Original", "Log2 Transformed")
-data
-table(data)
 data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
 mean(log2(sense.filtered.cpm[,1]))),
 Mode = c(getmode(sense.filtered.cpm[,1]),
@@ -510,3 +46,467 @@ library(preprocessCore)    # replace with custom function?
 mddExprData_quantile <- normalize.quantiles(sense.filtered.cpm)
 boxplot(mddExprData_quantile,range=0,ylab="raw intensity", main="Quantile Normalized")
 sapply(mddExprData_quantile, function(x) quantile(x, probs = seq(0, 1, 1/4)))
+quantile(mddExprData_quantile, probs = seq(0, 1, 1/4)))
+quantile(mddExprData_quantile, probs = seq(0, 1, 1/4))
+length(mddExprData_quantile)
+sapply(mddExprData_quantile, function(x) quantile(mddExprData_quantile, probs = seq(0, 1, 1/4)))
+head(mddExprData_quantile)
+head(mddExprData_quantile)
+head(mddExprData_quantile[,3])
+head(mddExprData_quantile[,1-3])
+head(mddExprData_quantile)
+### 3: Log2 on quantile normalized data
+mddExprData_quantileLog2 <- log2(mddExprData_quantile)
+# add phenotype names to matrix
+colnames(mddExprData_quantileLog2) <- mddPheno
+boxplot(mddExprData_quantileLog2,range=0,
+ylab="log2 intensity", main="Quantile Normalized Log2")
+hist(log2(mddExprData_quantileLog2[,1]), freq=F,
+ylab="density", xlab="log2 probe intensity",
+main="log2 Quantile Normalized for Sample 1")
+## 4: Means
+mean(mddExprData_quantileLog2[,1])
+colMeans(mddExprData_quantileLog2)
+## 4: Means
+mean(mddExprData_quantileLog2[,1])
+expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene
+colnames(expr_SxG) <- rownames(sense.filtered.cpm)  # add gene names
+## MDS of subjects
+#d<-dist(expr_SxG)         # Euclidean metric
+mddCorr<-cor(t(expr_SxG))  # distance based on correlation
+d <- sqrt(1-mddCorr)
+mdd.mds <- cmdscale(d, k = 2)
+x <- mdd.mds[,1]
+y <- mdd.mds[,2]
+mdd.mds.df <- data.frame(mdd.mds)
+colnames(mdd.mds.df) <- c("dim1","dim2")
+if (!require("ggplot2"))
+BiocManager::install("ggplot2")
+library(ggplot2)
+p <- ggplot() # initialize empty ggplot object
+p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3)
+p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2")
+print(p)
+expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene
+colnames(expr_SxG) <- rownames(sense.filtered.cpm)  # add gene names
+## MDS of subjects
+#d<-dist(expr_SxG)         # Euclidean metric
+mddCorr<-cor(t(expr_SxG))  # distance based on correlation
+d <- sqrt(1-mddCorr)
+mdd.mds <- cmdscale(d, k = 2)
+x <- mdd.mds[,1]
+y <- mdd.mds[,2]
+mdd.mds.df <- data.frame(mdd.mds)
+colnames(mdd.mds.df) <- c("dim1","dim2")
+if (!require("ggplot2"))
+BiocManager::install("ggplot2")
+library(ggplot2)
+p <- ggplot() # initialize empty ggplot object
+p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3)
+p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2")
+print(p)
+dim(mdd.mds)
+dim(mdd.mds.df)
+library(umap)
+mddTree = hclust(as.dist(d))
+mddTree$labels <- mddPheno
+plot(mddTree)
+num.clust <- 5
+mddCuts <- cutree(mddTree,k=num.clust)
+table(names(mddCuts),mddCuts)
+if (!require("dendextend")) install.packages("dendextend")
+library(dendextend)
+if (!require("dendextend")) install.packages("dendextend")
+library(dendextend)
+## 4: UMAP
+if (!require("umap")) install.packages("umap")
+library(umap)
+## 2: hierarchical cluster of subjects
+mddTree = hclust(as.dist(d))
+mddTree$labels <- mddPheno
+plot(mddTree)
+## 2: hierarchical cluster of subjects
+mddTree = hclust(as.dist(d))
+mddTree$labels <- mddPheno
+plot(mddTree)
+table(mddTree)
+num.clust <- 5
+mddCuts <- cutree(mddTree,k=num.clust)
+table(names(mddCuts),mddCuts)
+?table
+prop.table(names(mddCuts),mddCuts)
+mddCutstable <- table(names(mddCuts),mddCuts)
+prop.table(mddCutstable)
+prop.table(mddCutstable, margin=1)
+prop.table(mddCutstable, margin=2)
+if (!require("dendextend")) install.packages("dendextend")
+library(dendextend)
+dend <- as.dendrogram(mddTree)
+dend=color_labels(dend, k=num.clust)
+#dend <- dend %>% color_branches(k = 4)
+plot(dend) # selective coloring of branches AND labels
+## 3: Clusters
+if (!require("WGCNA"))
+BiocManager::install("WGCNA")
+library(WGCNA)
+# Plot the dendrogram and colors underneath
+sizeGrWindow(8,6)
+dynamicMods = cutreeDynamic(dendro = mddTree, distM = d,
+deepSplit = 2, pamRespectsDendro = FALSE,
+minClusterSize = 2)
+mddColors = labels2colors(dynamicMods)
+table(mddColors)
+table(mddColors,names(mddCuts))
+plotDendroAndColors(mddTree, mddColors, "Dynamic Clusters",
+dendroLabels = NULL, # hang = -1,
+addGuide = TRUE, #guideHang = 0.05,
+main = "Clustering with WGCNA")
+mddColorstable <- table(mddColors,names(mddCuts))
+prop.table(mddColorstable)
+prop.table(mddColorstable, margin = 1)
+## 4: UMAP
+if (!require("umap")) install.packages("umap")
+library(umap)
+# change umap config parameters
+custom.config = umap.defaults
+custom.config$random_state = 123
+custom.config$n_epochs = 500
+# umap to cluster observations
+obs_umap = umap(expr_SxG, config=custom.config)
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+#colors[dats[,ncol(dats)]==1]="red"
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+colors[dats[,ncol(dats)]==1]="red"
+# colors[dats[,ncol(dats)]==1]="red"
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+# umap to cluster observations
+obs_umap = umap(expr_SxG, config=custom.config)
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+# colors[dats[,ncol(dats)]==1]="red"
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+legend("bottomleft", legend = c("class -1","class +1"),
+col=c("black","red"), pch=19)
+# colors[dats[,ncol(dats)]==1]="red"
+colors[expr_SxG[,ncol(expr_SxG)]==1]="red"
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+legend("bottomleft", legend = c("class -1","class +1"),
+col=c("black","red"), pch=19)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+# colors[dats[,ncol(dats)]==1]="red"
+colors = rep("red",ncol(expr_SxG))
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG), "red",ncol(expr_SxG))
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+nrow(expr_SxG)
+obs_umap
+obs_umap$data
+obs_umap
+obs_umap$layout
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+# colors[dats[,ncol(dats)]==1]="red"
+colors[obs_umap[,ncol(obs_umap)]>0]="red"
+# colors[dats[,ncol(dats)]==1]="red"
+colors[obs_umap$layout[,nrow(obs_umap$layout)]>0]="red"
+# colors[dats[,ncol(dats)]==1]="red"
+colors[obs_umap[,nrow(obs_umap$layout)]>0]="red"
+obs_umap$layout
+# colors[dats[,ncol(dats)]==1]="red"
+colors[obs_umap$layout[,1]>0]="red"
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+obs_umap
+obs_umap$knn
+obs_umap$layout
+obs_umap$layout[0]
+obs_umap$layout[1]
+obs_umap$layout$name
+obs_umap$layout$names
+obs_umap$layout
+obs_umap$layout$HC
+obs_umap
+obs_umap$config
+obs_umap$data
+obs_umap
+# umap to cluster observations
+obs_umap = umap(expr_SxG, config=custom.config)
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+legend("bottomleft", legend = c("class -1","class +1"),
+col=c("black","red"), pch=19)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+plot(obs_umap$layout, #col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+rownames(expr_SxG)
+labels2colors(obs_umap)
+labels2colors(as.df(obs_umap))
+labels2colors(as.dataframe(obs_umap))
+rownames(expr_SxG)
+colors = rownames(expr_SxG)
+colors
+sapply(colors, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red"))
+sapply(colors[i], ifelse(grep(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red"))
+colors <- sapply(expr_SxG, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red"))
+colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red")))
+colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"))
+colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)) "black", "red"))
+colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"))
+colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"})
+colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE)), "black", "red"})
+colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
+colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
+?sapply
+colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), MDD, fixed=TRUE), "black", "red")})
+colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD"", fixed=TRUE), "black", "red")})
+colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
+grep(rownames(expr_SxG), "MDD")
+grep(rownames(expr_SxG), "HC")
+grep(rownames(expr_SxG), "HC", fixed=TRUE)
+colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")})
+rownames(expr_SxG[1])
+rownames(expr_SxG[1][1])
+rownames(expr_SxG[1],[1])
+rownames(expr_SxG[1,1])
+rownames(expr_SxG[2,2])
+rownames(expr_SxG[2])
+rownames(expr_SxG)[1]
+rownames(expr_SxG)[2]
+colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")})
+colors
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")})
+colors
+{ifelse(grep(x,
+"MDD", fixed=TRUE),
+"black", "red")})
+# colors[dats[,ncol(dats)]==1]="red"
+colors <- sapply(expr_SxG, function(x)
+{ifelse(
+grep(x, "MDD", fixed=TRUE),
+"black", "red")
+}
+)
+colors
+ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red"
+)
+colors <- ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red"
+)
+colors
+colors <- ifelse(grepl(expr_SxG, "MDD", fixed=TRUE), "black", "red")
+colors
+# colors[dats[,ncol(dats)]==1]="red"
+colors <- sapply(expr_SxG, function(x)
+{ifelse(
+grepl(x, "MDD", fixed=TRUE),
+"black", "red")
+}
+)
+colors
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+{ifelse(
+grepl(expr_SxG[x], "MDD", fixed=TRUE),
+"black", "red")
+}
+expr_SxG
+{ifelse(
+grepl(rownames(x), "MDD", fixed=TRUE),
+"black", "red")
+}
+{ifelse(
+grepl(rownames(SxG)[x], "MDD", fixed=TRUE),
+"black", "red")
+}
+{ifelse(
+grepl(rownames(expr_SxG)[x], "MDD", fixed=TRUE),
+"black", "red")
+}
+for (i in numrows(expr_SxG)) {
+colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
+}
+for (i in nrow(expr_SxG)) {
+colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
+}
+expr_SxG
+expr_SxG[1]
+for (i in nrow(expr_SxG)) {
+colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
+}
+colors
+colors[i] <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")
+colors
+expr_SxG
+expr_SxG[1][1]
+expr_SxG[1]
+expr_SxG[2]
+expr_SxG[1]
+expr_SxG[1,1]
+expr_SxG[0,1]
+expr_SxG[0,0]
+expr_SxG[1,0]
+expr_SxG[1,2]
+expr_SxG[3,2]$name
+class(expr_SxG[3,2])
+class(expr_SxG)
+expr_SxG[3,2]$label
+expr_SxG[3,2]$labels
+labels(expr_SxG)
+labels(expr_SxG[1])
+labels(expr_SxG[1,1])
+labels(expr_SxG[1,2])
+labels(expr_SxG[3,2])
+labels(expr_SxG[1,])
+labels(expr_SxG[,1])
+colnames(expr_SxG)
+rownames(expr_SxG)
+rownames(expr_SxG[1])
+rownames(expr_SxG[1])[1]
+rownames(expr_SxG[1])[2]
+rownames(expr_SxG[1])[31]
+for (i in nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red")
+}
+colors
+colors <- list()
+for (i in nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red")
+}
+colors
+rownames(expr_SxG[1])[31]
+grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE)
+grepl(rownames(expr_SxG)[31], MDD, fixed=TRUE)
+grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE)
+grepl(rownames(expr_SxG)[31], "MDD")
+class(rownames(expr_SxG)[31])
+grepl("MDD", rownames(expr_SxG)[31])
+colors <- list()
+for (i in nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
+}
+colors
+grepl("MDD", rownames(expr_SxG)[157])
+rownames(expr_SxG)[157]
+rownames(expr_SxG)[1565]
+rownames(expr_SxG)[156]
+ifelse(grepl("MDD", rownames(expr_SxG)[156]), "black", "red")
+ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red")
+ifelse(grepl("MDD", rownames(expr_SxG)[1]), "black", "red")
+colors[1]
+colors[2]
+colors[1,1]
+colors[,1]
+colors[1,]
+nrow(expr_SxG)
+ifelse(grepl("MDD", rownames(expr_SxG)[2]), "black", "red")
+ifelse(grepl("MDD", rownames(expr_SxG)[3]), "black", "red")
+ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red")
+for i in nrow(expr_SxG){ print(i) {
+for i in nrow(expr_SxG){ print(i) {
+for (i in nrow(expr_SxG)){ print(i) {
+for (i in nrow(expr_SxG)){ print(i) }
+for (i in 1:nrow(expr_SxG)){ print(i) }
+for (i in 1:nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
+}
+colors
+# umap to cluster observations
+obs_umap = umap(expr_SxG, config=custom.config)
+colors <- list()
+for (i in 1:nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
+}
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+class(colors)
+dim(colors)
+for (i in 1:nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
+}
+colors
+colors[157]
+colors[157][1]
+class(colors)
+?plot
+?plot
+class(sample(letters[1:3]),10)
+class(sample(letters[1:3], 10, replace=TRUE))
+colors
+dim
+?dim
+dim(colors)
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+class(colors)
+colors
+for (i in 1:nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
+}
+colors
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+# change umap config parameters
+custom.config = umap.defaults
+custom.config$random_state = 123
+custom.config$n_epochs = 500
+# umap to cluster observations
+obs_umap = umap(expr_SxG, config=custom.config)
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+for (i in 1:nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
+}
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+legend("bottomleft", legend = c("class -1","class +1"),
+col=c("black","red"), pch=19)
+custom.config = umap.defaults
+custom.config$random_state = 123
+custom.config$n_epochs = 500
+# umap to cluster observations
+obs_umap = umap(expr_SxG, config=custom.config)
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+for (i in 1:nrow(expr_SxG)) {
+colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
+}
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors,
+main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+legend("bottomleft", legend = c("MDD","HC"),
+col=c("black","red"), pch=19)
+data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
+mean(log2(sense.filtered.cpm[,1]))),
+Mode = c(getmode(sense.filtered.cpm[,1]),
+getmode(log2(sense.filtered.cpm[,1]))),
+Median = c(median(sense.filtered.cpm[,1]),
+median(log2(sense.filtered.cpm[,1])))
+)
+rownames(data) = c("Original", "Log2 Transformed")
+data
diff --git a/.~lock.lab3_expression.docx# b/.~lock.lab3_expression.docx#
deleted file mode 100644
index 782efae..0000000
--- a/.~lock.lab3_expression.docx#
+++ /dev/null
@@ -1 +0,0 @@
-,noah,NovaArchSys,22.09.2022 19:18,file:///home/noah/.config/libreoffice/4;
\ No newline at end of file
diff --git a/Schrick-Noah_CS-6643_Lab-3.R b/Schrick-Noah_CS-6643_Lab-3.R
index 633cf48..08cef5b 100644
--- a/Schrick-Noah_CS-6643_Lab-3.R
+++ b/Schrick-Noah_CS-6643_Lab-3.R
@@ -60,7 +60,8 @@ data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]),
                             mean(log2(sense.filtered.cpm[,1]))),
                    Mode = c(getmode(sense.filtered.cpm[,1]),
                             getmode(log2(sense.filtered.cpm[,1]))),
-                   Median = c(median(sense.filtered.cpm[,1]))
+                   Median = c(median(sense.filtered.cpm[,1]),
+                              median(log2(sense.filtered.cpm[,1])))
                    )
 rownames(data) = c("Original", "Log2 Transformed")
 data
@@ -97,3 +98,92 @@ hist(log2(mddExprData_quantileLog2[,1]), freq=F,
 ## 4: Means
 mean(mddExprData_quantileLog2[,1])
 colMeans(mddExprData_quantileLog2)
+
+
+#### Part C: Exploratory clustering of microarray data
+## 1: MDS
+# transpose data matrix and convert to data frame
+# ggplot wants data frame and subjects as rows
+expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene
+colnames(expr_SxG) <- rownames(sense.filtered.cpm)  # add gene names
+
+# MDS of subjects 
+#d<-dist(expr_SxG)         # Euclidean metric
+mddCorr<-cor(t(expr_SxG))  # distance based on correlation
+d <- sqrt(1-mddCorr)
+mdd.mds <- cmdscale(d, k = 2)
+x <- mdd.mds[,1]
+y <- mdd.mds[,2]
+mdd.mds.df <- data.frame(mdd.mds)
+colnames(mdd.mds.df) <- c("dim1","dim2")
+
+if (!require("ggplot2"))
+  BiocManager::install("ggplot2")
+library(ggplot2)
+
+p <- ggplot() # initialize empty ggplot object
+p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3)
+p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2")
+print(p)
+
+dim(mdd.mds)
+
+## 2: hierarchical cluster of subjects
+mddTree = hclust(as.dist(d))
+mddTree$labels <- mddPheno
+plot(mddTree)
+
+num.clust <- 5
+mddCuts <- cutree(mddTree,k=num.clust)
+mddCutstable <- table(names(mddCuts),mddCuts)
+prop.table(mddCutstable, margin=2)
+
+
+if (!require("dendextend")) install.packages("dendextend")
+library(dendextend)
+dend <- as.dendrogram(mddTree)
+dend=color_labels(dend, k=num.clust)
+#dend <- dend %>% color_branches(k = 4) 
+plot(dend) # selective coloring of branches AND labels 
+
+
+## 3: Clusters
+if (!require("WGCNA"))
+  BiocManager::install("WGCNA")
+library(WGCNA)
+# Plot the dendrogram and colors underneath
+sizeGrWindow(8,6)
+dynamicMods = cutreeDynamic(dendro = mddTree, distM = d,
+                            deepSplit = 2, pamRespectsDendro = FALSE,
+                            minClusterSize = 2)
+mddColors = labels2colors(dynamicMods)
+table(mddColors)
+mddColorstable <- table(mddColors,names(mddCuts))
+prop.table(mddColorstable, margin = 1)
+plotDendroAndColors(mddTree, mddColors, "Dynamic Clusters",
+                    dendroLabels = NULL, # hang = -1,
+                    addGuide = TRUE, #guideHang = 0.05,
+                    main = "Clustering with WGCNA")
+
+## 4: UMAP
+if (!require("umap")) install.packages("umap")
+library(umap)
+# change umap config parameters
+custom.config = umap.defaults
+custom.config$random_state = 123
+custom.config$n_epochs = 500
+
+# umap to cluster observations
+obs_umap = umap(expr_SxG, config=custom.config)
+#add colors for MDD/HC
+colors = rep("black",nrow(expr_SxG))
+
+for (i in 1:nrow(expr_SxG)) {
+  colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red")
+}
+
+dim(obs_umap$layout)
+plot(obs_umap$layout, col=colors, 
+     main="umap of observations", xlab="umap dim1", ylab="umap dim2")
+legend("bottomleft", legend = c("MDD","HC"), 
+       col=c("black","red"), pch=19)
diff --git a/Schrick-Noah_CS-6643_Lab-3.docx b/Schrick-Noah_CS-6643_Lab-3.docx
new file mode 100644
index 0000000..e5a2b5c
Binary files /dev/null and b/Schrick-Noah_CS-6643_Lab-3.docx differ
diff --git a/Schrick-Noah_CS-6643_Lab-3.pdf b/Schrick-Noah_CS-6643_Lab-3.pdf
new file mode 100644
index 0000000..c47eb90
Binary files /dev/null and b/Schrick-Noah_CS-6643_Lab-3.pdf differ
diff --git a/lab3_expression.docx b/lab3_expression.docx
index 3a4df7d..086814e 100644
Binary files a/lab3_expression.docx and b/lab3_expression.docx differ