diff --git a/.Rhistory b/.Rhistory index 82ef643..69af0ed 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,426 +1,4 @@ -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1,2,3)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -#g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.breaks <- g.hist$breaks # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -alpha.LM -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) -plot(yeast) -hist(yeast) -hist(g.vec) -g.pois -g.mean -alpha.LM -alpha.ML -degree(g) -sort(degree(g)) -sort(degree(g),decreasing=FALSE) -sort(degree(g),decreasing=F) -sort(degree(g),decreasing=false) -sort(degree(g), decreasing = TRUE) -head(sort(degree(g), decreasing = TRUE)) -stddev(degree(g)) -sd(degree(g)) -tail(sort(degree(g), decreasing = TRUE)) -plot(log(g.breaks.clean), log(g.probs.clean)) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) -plot(log(g.breaks.clean), log(g.probs.clean)) -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -plot(log(g.breaks.clean), log(g.probs.clean)) -# load gene expression data -load("sense.filtered.cpm.Rdata") -## Set Working Directory to file directory - RStudio approach -setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) -# load gene expression data -load("sense.filtered.cpm.Rdata") -dim(sense.filtered.cpm) -colnames(sense.filtered.cpm) -length(sense.filtered.cpm) -## Set Working Directory to file directory - RStudio approach -setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) -## 2: Loading Demographic Data -subject.attrs <- read.csv("Demographic_symptom.csv", stringsAsFactors = FALSE) -dim(subject.attrs) # 160 subjects x 40 attributes -colnames(subject.attrs) # interested in X (sample ids) and Diag (diagnosis) -subject.attrs$X -subject.attrs$Diag -dim(sense.filtered.cpm) -colnames(sense.filtered.cpm) -rownames(sense.filtered.cpm) -# Matching gene expression samples with their diagnosis if (!require("dplyr")) install.packages("dplyr") -# Matching gene expression samples with their diagnosis -if (!require("dplyr")) install.packages("dplyr") -# create a phenotype vector -# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that -# intersect %in% with the RNA-Seq data -phenos.df <- subject.attrs %>% -filter(X %in% colnames(sense.filtered.cpm)) %>% -dplyr::select(X, Diag) -# Matching gene expression samples with their diagnosis -if (!require("dplyr")) install.packages("dplyr") -# create a phenotype vector -# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that -# intersect %in% with the RNA-Seq data -phenos.df <- subject.attrs %>% -filter(X %in% colnames(sense.filtered.cpm)) %>% -dplyr::select(X, Diag) library(dplyr) # create a phenotype vector # grab X (subject ids) and Diag (Diagnosis) from subject.attrs that @@ -437,59 +15,17 @@ summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control # raw cpm boxplots and histogram of one sample boxplot(sense.filtered.cpm,range=0, ylab="raw probe intensity", main="Raw", names=mddPheno) +hist(sense.filtered.cpm[,1], freq=F, ylab="density", +xlab="raw probe intensity", main="Raw Data Density for Sample 1") # log2 transformed boxplots and histogram boxplot(log2(sense.filtered.cpm), range=0, ylab="log2 intensity", main="Log2 Transformed", names=mddPheno) -hist(sense.filtered.cpm[,1], freq=F, ylab="density", -xlab="raw probe intensity", main="Raw Data Density for Sample 1") hist(log2(sense.filtered.cpm[,1]), freq=F, ylab="density", xlab="log2 probe intensity", main="log2 Data Density for Sample 1") -median(sense.filtered.cpm[,1]) -mode(sense.filtered.cpm[,1]) -average(sense.filtered.cpm[,1]) -mean(sense.filtered.cpm[,1]) getmode <- function(v) { uniqv <-unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } -getmode(sense.filtered.cpm[,1]) -median(sense.filtered.cpm[,1]) -mean(sense.filtered.cpm[,1]) -getmode(sense.filtered.cpm) -median(sense.filtered.cpm) -mean(sense.filtered.cpm) -getmode(log2(sense.filtered.cpm)) -median(log2(sense.filtered.cpm)) -mean(log2(sense.filtered.cpm)) -log2(sense.filtered.cpm) -log2(sense.filtered.cpm[,1]) -getmode(log2(sense.filtered.cpm[,1])) -median(log2(sense.filtered.cpm[,1])) -mean(log2(sense.filtered.cpm[,1])) -data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), -data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), -mean(log2(sense.filtered.cpm[,1]))), -Mode = c(getmode(sense.filtered.cpm[,1]), -getmode(log2(sense.filtered.cpm[,1]))), -Median = c(median(sense.filtered.cpm[,1])), -) -data -data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), -mean(log2(sense.filtered.cpm[,1]))), -Mode = c(getmode(sense.filtered.cpm[,1]), -getmode(log2(sense.filtered.cpm[,1]))), -Median = c(median(sense.filtered.cpm[,1])), -) -data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), -mean(log2(sense.filtered.cpm[,1]))), -Mode = c(getmode(sense.filtered.cpm[,1]), -getmode(log2(sense.filtered.cpm[,1]))), -Median = c(median(sense.filtered.cpm[,1])) -) -data -rownames(data) = c("Original", "Log2 Transformed") -data -table(data) data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), mean(log2(sense.filtered.cpm[,1]))), Mode = c(getmode(sense.filtered.cpm[,1]), @@ -510,3 +46,467 @@ library(preprocessCore) # replace with custom function? mddExprData_quantile <- normalize.quantiles(sense.filtered.cpm) boxplot(mddExprData_quantile,range=0,ylab="raw intensity", main="Quantile Normalized") sapply(mddExprData_quantile, function(x) quantile(x, probs = seq(0, 1, 1/4))) +quantile(mddExprData_quantile, probs = seq(0, 1, 1/4))) +quantile(mddExprData_quantile, probs = seq(0, 1, 1/4)) +length(mddExprData_quantile) +sapply(mddExprData_quantile, function(x) quantile(mddExprData_quantile, probs = seq(0, 1, 1/4))) +head(mddExprData_quantile) +head(mddExprData_quantile) +head(mddExprData_quantile[,3]) +head(mddExprData_quantile[,1-3]) +head(mddExprData_quantile) +### 3: Log2 on quantile normalized data +mddExprData_quantileLog2 <- log2(mddExprData_quantile) +# add phenotype names to matrix +colnames(mddExprData_quantileLog2) <- mddPheno +boxplot(mddExprData_quantileLog2,range=0, +ylab="log2 intensity", main="Quantile Normalized Log2") +hist(log2(mddExprData_quantileLog2[,1]), freq=F, +ylab="density", xlab="log2 probe intensity", +main="log2 Quantile Normalized for Sample 1") +## 4: Means +mean(mddExprData_quantileLog2[,1]) +colMeans(mddExprData_quantileLog2) +## 4: Means +mean(mddExprData_quantileLog2[,1]) +expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene +colnames(expr_SxG) <- rownames(sense.filtered.cpm) # add gene names +## MDS of subjects +#d<-dist(expr_SxG) # Euclidean metric +mddCorr<-cor(t(expr_SxG)) # distance based on correlation +d <- sqrt(1-mddCorr) +mdd.mds <- cmdscale(d, k = 2) +x <- mdd.mds[,1] +y <- mdd.mds[,2] +mdd.mds.df <- data.frame(mdd.mds) +colnames(mdd.mds.df) <- c("dim1","dim2") +if (!require("ggplot2")) +BiocManager::install("ggplot2") +library(ggplot2) +p <- ggplot() # initialize empty ggplot object +p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3) +p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2") +print(p) +expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene +colnames(expr_SxG) <- rownames(sense.filtered.cpm) # add gene names +## MDS of subjects +#d<-dist(expr_SxG) # Euclidean metric +mddCorr<-cor(t(expr_SxG)) # distance based on correlation +d <- sqrt(1-mddCorr) +mdd.mds <- cmdscale(d, k = 2) +x <- mdd.mds[,1] +y <- mdd.mds[,2] +mdd.mds.df <- data.frame(mdd.mds) +colnames(mdd.mds.df) <- c("dim1","dim2") +if (!require("ggplot2")) +BiocManager::install("ggplot2") +library(ggplot2) +p <- ggplot() # initialize empty ggplot object +p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3) +p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2") +print(p) +dim(mdd.mds) +dim(mdd.mds.df) +library(umap) +mddTree = hclust(as.dist(d)) +mddTree$labels <- mddPheno +plot(mddTree) +num.clust <- 5 +mddCuts <- cutree(mddTree,k=num.clust) +table(names(mddCuts),mddCuts) +if (!require("dendextend")) install.packages("dendextend") +library(dendextend) +if (!require("dendextend")) install.packages("dendextend") +library(dendextend) +## 4: UMAP +if (!require("umap")) install.packages("umap") +library(umap) +## 2: hierarchical cluster of subjects +mddTree = hclust(as.dist(d)) +mddTree$labels <- mddPheno +plot(mddTree) +## 2: hierarchical cluster of subjects +mddTree = hclust(as.dist(d)) +mddTree$labels <- mddPheno +plot(mddTree) +table(mddTree) +num.clust <- 5 +mddCuts <- cutree(mddTree,k=num.clust) +table(names(mddCuts),mddCuts) +?table +prop.table(names(mddCuts),mddCuts) +mddCutstable <- table(names(mddCuts),mddCuts) +prop.table(mddCutstable) +prop.table(mddCutstable, margin=1) +prop.table(mddCutstable, margin=2) +if (!require("dendextend")) install.packages("dendextend") +library(dendextend) +dend <- as.dendrogram(mddTree) +dend=color_labels(dend, k=num.clust) +#dend <- dend %>% color_branches(k = 4) +plot(dend) # selective coloring of branches AND labels +## 3: Clusters +if (!require("WGCNA")) +BiocManager::install("WGCNA") +library(WGCNA) +# Plot the dendrogram and colors underneath +sizeGrWindow(8,6) +dynamicMods = cutreeDynamic(dendro = mddTree, distM = d, +deepSplit = 2, pamRespectsDendro = FALSE, +minClusterSize = 2) +mddColors = labels2colors(dynamicMods) +table(mddColors) +table(mddColors,names(mddCuts)) +plotDendroAndColors(mddTree, mddColors, "Dynamic Clusters", +dendroLabels = NULL, # hang = -1, +addGuide = TRUE, #guideHang = 0.05, +main = "Clustering with WGCNA") +mddColorstable <- table(mddColors,names(mddCuts)) +prop.table(mddColorstable) +prop.table(mddColorstable, margin = 1) +## 4: UMAP +if (!require("umap")) install.packages("umap") +library(umap) +# change umap config parameters +custom.config = umap.defaults +custom.config$random_state = 123 +custom.config$n_epochs = 500 +# umap to cluster observations +obs_umap = umap(expr_SxG, config=custom.config) +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +#colors[dats[,ncol(dats)]==1]="red" +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +colors[dats[,ncol(dats)]==1]="red" +# colors[dats[,ncol(dats)]==1]="red" +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +# umap to cluster observations +obs_umap = umap(expr_SxG, config=custom.config) +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +# colors[dats[,ncol(dats)]==1]="red" +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +legend("bottomleft", legend = c("class -1","class +1"), +col=c("black","red"), pch=19) +# colors[dats[,ncol(dats)]==1]="red" +colors[expr_SxG[,ncol(expr_SxG)]==1]="red" +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +legend("bottomleft", legend = c("class -1","class +1"), +col=c("black","red"), pch=19) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +# colors[dats[,ncol(dats)]==1]="red" +colors = rep("red",ncol(expr_SxG)) +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG), "red",ncol(expr_SxG)) +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +nrow(expr_SxG) +obs_umap +obs_umap$data +obs_umap +obs_umap$layout +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +# colors[dats[,ncol(dats)]==1]="red" +colors[obs_umap[,ncol(obs_umap)]>0]="red" +# colors[dats[,ncol(dats)]==1]="red" +colors[obs_umap$layout[,nrow(obs_umap$layout)]>0]="red" +# colors[dats[,ncol(dats)]==1]="red" +colors[obs_umap[,nrow(obs_umap$layout)]>0]="red" +obs_umap$layout +# colors[dats[,ncol(dats)]==1]="red" +colors[obs_umap$layout[,1]>0]="red" +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +obs_umap +obs_umap$knn +obs_umap$layout +obs_umap$layout[0] +obs_umap$layout[1] +obs_umap$layout$name +obs_umap$layout$names +obs_umap$layout +obs_umap$layout$HC +obs_umap +obs_umap$config +obs_umap$data +obs_umap +# umap to cluster observations +obs_umap = umap(expr_SxG, config=custom.config) +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +legend("bottomleft", legend = c("class -1","class +1"), +col=c("black","red"), pch=19) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +plot(obs_umap$layout, #col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +rownames(expr_SxG) +labels2colors(obs_umap) +labels2colors(as.df(obs_umap)) +labels2colors(as.dataframe(obs_umap)) +rownames(expr_SxG) +colors = rownames(expr_SxG) +colors +sapply(colors, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red")) +sapply(colors[i], ifelse(grep(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red")) +colors <- sapply(expr_SxG, ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red")) +colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE), "black", "red"))) +colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red")) +colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)) "black", "red")) +colors <- sapply(expr_SxG, (ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red")) +colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(expr_SxG), "MDD", fixed=TRUE)), "black", "red"}) +colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE)), "black", "red"}) +colors <- sapply(expr_SxG, 1, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")}) +colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")}) +?sapply +colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), MDD, fixed=TRUE), "black", "red")}) +colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD"", fixed=TRUE), "black", "red")}) +colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")}) +grep(rownames(expr_SxG), "MDD") +grep(rownames(expr_SxG), "HC") +grep(rownames(expr_SxG), "HC", fixed=TRUE) +colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(x), "MDD", fixed=TRUE), "black", "red")}) +rownames(expr_SxG[1]) +rownames(expr_SxG[1][1]) +rownames(expr_SxG[1],[1]) +rownames(expr_SxG[1,1]) +rownames(expr_SxG[2,2]) +rownames(expr_SxG[2]) +rownames(expr_SxG)[1] +rownames(expr_SxG)[2] +colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")}) +colors +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +colors <- sapply(expr_SxG, function(x) {ifelse(grep(rownames(expr_SxG)[x], "MDD", fixed=TRUE), "black", "red")}) +colors +{ifelse(grep(x, +"MDD", fixed=TRUE), +"black", "red")}) +# colors[dats[,ncol(dats)]==1]="red" +colors <- sapply(expr_SxG, function(x) +{ifelse( +grep(x, "MDD", fixed=TRUE), +"black", "red") +} +) +colors +ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red" +) +colors <- ifelse(grep(expr_SxG, "MDD", fixed=TRUE), "black", "red" +) +colors +colors <- ifelse(grepl(expr_SxG, "MDD", fixed=TRUE), "black", "red") +colors +# colors[dats[,ncol(dats)]==1]="red" +colors <- sapply(expr_SxG, function(x) +{ifelse( +grepl(x, "MDD", fixed=TRUE), +"black", "red") +} +) +colors +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +{ifelse( +grepl(expr_SxG[x], "MDD", fixed=TRUE), +"black", "red") +} +expr_SxG +{ifelse( +grepl(rownames(x), "MDD", fixed=TRUE), +"black", "red") +} +{ifelse( +grepl(rownames(SxG)[x], "MDD", fixed=TRUE), +"black", "red") +} +{ifelse( +grepl(rownames(expr_SxG)[x], "MDD", fixed=TRUE), +"black", "red") +} +for (i in numrows(expr_SxG)) { +colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red") +} +for (i in nrow(expr_SxG)) { +colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red") +} +expr_SxG +expr_SxG[1] +for (i in nrow(expr_SxG)) { +colors.append() <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red") +} +colors +colors[i] <- ifelse(grepl(rownames(expr_SxG[i]), "MDD", fixed=TRUE), "black", "red") +colors +expr_SxG +expr_SxG[1][1] +expr_SxG[1] +expr_SxG[2] +expr_SxG[1] +expr_SxG[1,1] +expr_SxG[0,1] +expr_SxG[0,0] +expr_SxG[1,0] +expr_SxG[1,2] +expr_SxG[3,2]$name +class(expr_SxG[3,2]) +class(expr_SxG) +expr_SxG[3,2]$label +expr_SxG[3,2]$labels +labels(expr_SxG) +labels(expr_SxG[1]) +labels(expr_SxG[1,1]) +labels(expr_SxG[1,2]) +labels(expr_SxG[3,2]) +labels(expr_SxG[1,]) +labels(expr_SxG[,1]) +colnames(expr_SxG) +rownames(expr_SxG) +rownames(expr_SxG[1]) +rownames(expr_SxG[1])[1] +rownames(expr_SxG[1])[2] +rownames(expr_SxG[1])[31] +for (i in nrow(expr_SxG)) { +colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red") +} +colors +colors <- list() +for (i in nrow(expr_SxG)) { +colors[i] <- ifelse(grepl(rownames(expr_SxG)[i], "MDD", fixed=TRUE), "black", "red") +} +colors +rownames(expr_SxG[1])[31] +grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE) +grepl(rownames(expr_SxG)[31], MDD, fixed=TRUE) +grepl(rownames(expr_SxG)[31], "MDD", fixed=TRUE) +grepl(rownames(expr_SxG)[31], "MDD") +class(rownames(expr_SxG)[31]) +grepl("MDD", rownames(expr_SxG)[31]) +colors <- list() +for (i in nrow(expr_SxG)) { +colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") +} +colors +grepl("MDD", rownames(expr_SxG)[157]) +rownames(expr_SxG)[157] +rownames(expr_SxG)[1565] +rownames(expr_SxG)[156] +ifelse(grepl("MDD", rownames(expr_SxG)[156]), "black", "red") +ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red") +ifelse(grepl("MDD", rownames(expr_SxG)[1]), "black", "red") +colors[1] +colors[2] +colors[1,1] +colors[,1] +colors[1,] +nrow(expr_SxG) +ifelse(grepl("MDD", rownames(expr_SxG)[2]), "black", "red") +ifelse(grepl("MDD", rownames(expr_SxG)[3]), "black", "red") +ifelse(grepl("MDD", rownames(expr_SxG)[155]), "black", "red") +for i in nrow(expr_SxG){ print(i) { +for i in nrow(expr_SxG){ print(i) { +for (i in nrow(expr_SxG)){ print(i) { +for (i in nrow(expr_SxG)){ print(i) } +for (i in 1:nrow(expr_SxG)){ print(i) } +for (i in 1:nrow(expr_SxG)) { +colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") +} +colors +# umap to cluster observations +obs_umap = umap(expr_SxG, config=custom.config) +colors <- list() +for (i in 1:nrow(expr_SxG)) { +colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") +} +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +class(colors) +dim(colors) +for (i in 1:nrow(expr_SxG)) { +colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") +} +colors +colors[157] +colors[157][1] +class(colors) +?plot +?plot +class(sample(letters[1:3]),10) +class(sample(letters[1:3], 10, replace=TRUE)) +colors +dim +?dim +dim(colors) +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +class(colors) +colors +for (i in 1:nrow(expr_SxG)) { +colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") +} +colors +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +# change umap config parameters +custom.config = umap.defaults +custom.config$random_state = 123 +custom.config$n_epochs = 500 +# umap to cluster observations +obs_umap = umap(expr_SxG, config=custom.config) +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +for (i in 1:nrow(expr_SxG)) { +colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") +} +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +legend("bottomleft", legend = c("class -1","class +1"), +col=c("black","red"), pch=19) +custom.config = umap.defaults +custom.config$random_state = 123 +custom.config$n_epochs = 500 +# umap to cluster observations +obs_umap = umap(expr_SxG, config=custom.config) +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) +for (i in 1:nrow(expr_SxG)) { +colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") +} +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, +main="umap of observations", xlab="umap dim1", ylab="umap dim2") +legend("bottomleft", legend = c("MDD","HC"), +col=c("black","red"), pch=19) +data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), +mean(log2(sense.filtered.cpm[,1]))), +Mode = c(getmode(sense.filtered.cpm[,1]), +getmode(log2(sense.filtered.cpm[,1]))), +Median = c(median(sense.filtered.cpm[,1]), +median(log2(sense.filtered.cpm[,1]))) +) +rownames(data) = c("Original", "Log2 Transformed") +data diff --git a/.~lock.lab3_expression.docx# b/.~lock.lab3_expression.docx# deleted file mode 100644 index 782efae..0000000 --- a/.~lock.lab3_expression.docx# +++ /dev/null @@ -1 +0,0 @@ -,noah,NovaArchSys,22.09.2022 19:18,file:///home/noah/.config/libreoffice/4; \ No newline at end of file diff --git a/Schrick-Noah_CS-6643_Lab-3.R b/Schrick-Noah_CS-6643_Lab-3.R index 633cf48..08cef5b 100644 --- a/Schrick-Noah_CS-6643_Lab-3.R +++ b/Schrick-Noah_CS-6643_Lab-3.R @@ -60,7 +60,8 @@ data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), mean(log2(sense.filtered.cpm[,1]))), Mode = c(getmode(sense.filtered.cpm[,1]), getmode(log2(sense.filtered.cpm[,1]))), - Median = c(median(sense.filtered.cpm[,1])) + Median = c(median(sense.filtered.cpm[,1]), + median(log2(sense.filtered.cpm[,1]))) ) rownames(data) = c("Original", "Log2 Transformed") data @@ -97,3 +98,92 @@ hist(log2(mddExprData_quantileLog2[,1]), freq=F, ## 4: Means mean(mddExprData_quantileLog2[,1]) colMeans(mddExprData_quantileLog2) + + +#### Part C: Exploratory clustering of microarray data +## 1: MDS +# transpose data matrix and convert to data frame +# ggplot wants data frame and subjects as rows +expr_SxG <- data.frame(t(mddExprData_quantileLog2)) # Subject x Gene +colnames(expr_SxG) <- rownames(sense.filtered.cpm) # add gene names + +# MDS of subjects +#d<-dist(expr_SxG) # Euclidean metric +mddCorr<-cor(t(expr_SxG)) # distance based on correlation +d <- sqrt(1-mddCorr) +mdd.mds <- cmdscale(d, k = 2) +x <- mdd.mds[,1] +y <- mdd.mds[,2] +mdd.mds.df <- data.frame(mdd.mds) +colnames(mdd.mds.df) <- c("dim1","dim2") + +if (!require("ggplot2")) + BiocManager::install("ggplot2") +library(ggplot2) + +p <- ggplot() # initialize empty ggplot object +p <- p + geom_point(data=mdd.mds.df, aes(x=dim1, y=dim2, color=mddPheno, shape=mddPheno), size=3) +p <- p + ggtitle("MDS") + xlab("Dim 1") + ylab("Dim 2") +print(p) + +dim(mdd.mds) + +## 2: hierarchical cluster of subjects +mddTree = hclust(as.dist(d)) +mddTree$labels <- mddPheno +plot(mddTree) + +num.clust <- 5 +mddCuts <- cutree(mddTree,k=num.clust) +mddCutstable <- table(names(mddCuts),mddCuts) +prop.table(mddCutstable, margin=2) + + +if (!require("dendextend")) install.packages("dendextend") +library(dendextend) +dend <- as.dendrogram(mddTree) +dend=color_labels(dend, k=num.clust) +#dend <- dend %>% color_branches(k = 4) +plot(dend) # selective coloring of branches AND labels + + +## 3: Clusters +if (!require("WGCNA")) + BiocManager::install("WGCNA") +library(WGCNA) +# Plot the dendrogram and colors underneath +sizeGrWindow(8,6) +dynamicMods = cutreeDynamic(dendro = mddTree, distM = d, + deepSplit = 2, pamRespectsDendro = FALSE, + minClusterSize = 2) +mddColors = labels2colors(dynamicMods) +table(mddColors) +mddColorstable <- table(mddColors,names(mddCuts)) +prop.table(mddColorstable, margin = 1) +plotDendroAndColors(mddTree, mddColors, "Dynamic Clusters", + dendroLabels = NULL, # hang = -1, + addGuide = TRUE, #guideHang = 0.05, + main = "Clustering with WGCNA") + +## 4: UMAP +if (!require("umap")) install.packages("umap") +library(umap) +# change umap config parameters +custom.config = umap.defaults +custom.config$random_state = 123 +custom.config$n_epochs = 500 + +# umap to cluster observations +obs_umap = umap(expr_SxG, config=custom.config) +#add colors for MDD/HC +colors = rep("black",nrow(expr_SxG)) + +for (i in 1:nrow(expr_SxG)) { + colors[i] <- ifelse(grepl("MDD", rownames(expr_SxG)[i]), "black", "red") +} + +dim(obs_umap$layout) +plot(obs_umap$layout, col=colors, + main="umap of observations", xlab="umap dim1", ylab="umap dim2") +legend("bottomleft", legend = c("MDD","HC"), + col=c("black","red"), pch=19) diff --git a/Schrick-Noah_CS-6643_Lab-3.docx b/Schrick-Noah_CS-6643_Lab-3.docx new file mode 100644 index 0000000..e5a2b5c Binary files /dev/null and b/Schrick-Noah_CS-6643_Lab-3.docx differ diff --git a/Schrick-Noah_CS-6643_Lab-3.pdf b/Schrick-Noah_CS-6643_Lab-3.pdf new file mode 100644 index 0000000..c47eb90 Binary files /dev/null and b/Schrick-Noah_CS-6643_Lab-3.pdf differ diff --git a/lab3_expression.docx b/lab3_expression.docx index 3a4df7d..086814e 100644 Binary files a/lab3_expression.docx and b/lab3_expression.docx differ