diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..82ef643 --- /dev/null +++ b/.Rhistory @@ -0,0 +1,512 @@ +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2) +################# Linear model: Least-Squares Fit ################# +g.breaks <- g.hist$breaks[-c(1,2,3)] # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.breaks[nz.probs.mask] +#plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) +# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course +# Degree Distribution +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +library(igraph) +library(igraphdata) +data(yeast) +g <- yeast +g.netname <- "Yeast" +################# Set up Work ################# +g.vec <- degree(g) +g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, +" Network")) +legend("topright", c("Guess", "Poisson", "Least-Squares Fit", +"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", +"#006CD1", "#E66100", "#D35FB7")) +g.mean <- mean(g.vec) +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2) +################# Linear model: Least-Squares Fit ################# +g.breaks <- g.hist$breaks[-c(1)] # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.breaks[nz.probs.mask] +#plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) +# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course +# Degree Distribution +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +library(igraph) +library(igraphdata) +data(yeast) +g <- yeast +g.netname <- "Yeast" +################# Set up Work ################# +g.vec <- degree(g) +g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, +" Network")) +legend("topright", c("Guess", "Poisson", "Least-Squares Fit", +"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", +"#006CD1", "#E66100", "#D35FB7")) +g.mean <- mean(g.vec) +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2) +################# Linear model: Least-Squares Fit ################# +#g.breaks <- g.hist$breaks[-c(1)] # remove 0 +g.breaks <- g.hist$breaks # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.breaks[nz.probs.mask] +#plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) +# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course +# Degree Distribution +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +library(igraph) +library(igraphdata) +data(yeast) +g <- yeast +g.netname <- "Yeast" +################# Set up Work ################# +g.vec <- degree(g) +g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, +" Network")) +legend("topright", c("Guess", "Poisson", "Least-Squares Fit", +"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", +"#006CD1", "#E66100", "#D35FB7")) +g.mean <- mean(g.vec) +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2) +################# Linear model: Least-Squares Fit ################# +g.breaks <- g.hist$breaks[-c(1)] # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.probs[nz.probs.mask] +#plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) +alpha.LM +# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course +# Degree Distribution +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +library(igraph) +library(igraphdata) +data(yeast) +g <- yeast +g.netname <- "Yeast" +################# Set up Work ################# +g.vec <- degree(g) +g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, +" Network")) +legend("topright", c("Guess", "Poisson", "Least-Squares Fit", +"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", +"#006CD1", "#E66100", "#D35FB7")) +g.mean <- mean(g.vec) +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2) +################# Linear model: Least-Squares Fit ################# +g.breaks <- g.hist$breaks[-c(1)] # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.probs[nz.probs.mask] +#plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) +# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course +# Degree Distribution +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +library(igraph) +library(igraphdata) +data(yeast) +g <- yeast +g.netname <- "Yeast" +################# Set up Work ################# +g.vec <- degree(g) +g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, +" Network")) +legend("topright", c("Guess", "Poisson", "Least-Squares Fit", +"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", +"#006CD1", "#E66100", "#D35FB7")) +g.mean <- mean(g.vec) +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2) +################# Linear model: Least-Squares Fit ################# +g.breaks <- g.hist$breaks[-c(1)] # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.probs[nz.probs.mask] +#plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) +# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course +# Degree Distribution +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +library(igraph) +library(igraphdata) +data(yeast) +g <- yeast +g.netname <- "Yeast" +################# Set up Work ################# +g.vec <- degree(g) +g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, +" Network")) +legend("topright", c("Guess", "Poisson", "Least-Squares Fit", +"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", +"#006CD1", "#E66100", "#D35FB7")) +g.mean <- mean(g.vec) +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2) +################# Linear model: Least-Squares Fit ################# +g.breaks <- g.hist$breaks[-c(1)] # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.probs[nz.probs.mask] +#plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) +# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course +# Degree Distribution +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +library(igraph) +library(igraphdata) +data(yeast) +g <- yeast +g.netname <- "Yeast" +################# Set up Work ################# +g.vec <- degree(g) +g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, +" Network")) +legend("topright", c("Guess", "Poisson", "Least-Squares Fit", +"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", +"#006CD1", "#E66100", "#D35FB7")) +g.mean <- mean(g.vec) +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) +################# Linear model: Least-Squares Fit ################# +g.breaks <- g.hist$breaks[-c(1)] # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.probs[nz.probs.mask] +#plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) +plot(yeast) +hist(yeast) +hist(g.vec) +g.pois +g.mean +alpha.LM +alpha.ML +degree(g) +sort(degree(g)) +sort(degree(g),decreasing=FALSE) +sort(degree(g),decreasing=F) +sort(degree(g),decreasing=false) +sort(degree(g), decreasing = TRUE) +head(sort(degree(g), decreasing = TRUE)) +stddev(degree(g)) +sd(degree(g)) +tail(sort(degree(g), decreasing = TRUE)) +plot(log(g.breaks.clean), log(g.probs.clean)) +# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course +# Degree Distribution +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +library(igraph) +library(igraphdata) +data(yeast) +g <- yeast +g.netname <- "Yeast" +################# Set up Work ################# +g.vec <- degree(g) +g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, +" Network")) +legend("topright", c("Guess", "Poisson", "Least-Squares Fit", +"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", +"#006CD1", "#E66100", "#D35FB7")) +g.mean <- mean(g.vec) +g.seq <- 0:max(g.vec) # x-axis +################# Guessing Alpha ################# +alpha.guess <- 1.5 +lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) +################# Poisson ################# +g.pois <- dpois(g.seq, g.mean, log=F) +lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) +################# Linear model: Least-Squares Fit ################# +g.breaks <- g.hist$breaks[-c(1)] # remove 0 +g.probs <- g.hist$density[-1] # make lengths match +# Need to clean up probabilities that are 0 +nz.probs.mask <- g.probs!=0 +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.probs[nz.probs.mask] +plot(log(g.breaks.clean), log(g.probs.clean)) +g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) +summary(g.fit) +alpha.LM <- coef(g.fit)[2] +lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) +################# Max-Log-Likelihood ################# +n <- length(g.breaks.clean) +kmin <- g.breaks.clean[1] +alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) +alpha.ML +lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) +plot(log(g.breaks.clean), log(g.probs.clean)) +g.breaks.clean <- g.breaks[nz.probs.mask] +g.probs.clean <- g.probs[nz.probs.mask] +plot(log(g.breaks.clean), log(g.probs.clean)) +# load gene expression data +load("sense.filtered.cpm.Rdata") +## Set Working Directory to file directory - RStudio approach +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +# load gene expression data +load("sense.filtered.cpm.Rdata") +dim(sense.filtered.cpm) +colnames(sense.filtered.cpm) +length(sense.filtered.cpm) +## Set Working Directory to file directory - RStudio approach +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +## 2: Loading Demographic Data +subject.attrs <- read.csv("Demographic_symptom.csv", stringsAsFactors = FALSE) +dim(subject.attrs) # 160 subjects x 40 attributes +colnames(subject.attrs) # interested in X (sample ids) and Diag (diagnosis) +subject.attrs$X +subject.attrs$Diag +dim(sense.filtered.cpm) +colnames(sense.filtered.cpm) +rownames(sense.filtered.cpm) +# Matching gene expression samples with their diagnosis +if (!require("dplyr")) install.packages("dplyr") +# Matching gene expression samples with their diagnosis +if (!require("dplyr")) install.packages("dplyr") +# create a phenotype vector +# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that +# intersect %in% with the RNA-Seq data +phenos.df <- subject.attrs %>% +filter(X %in% colnames(sense.filtered.cpm)) %>% +dplyr::select(X, Diag) +# Matching gene expression samples with their diagnosis +if (!require("dplyr")) install.packages("dplyr") +# create a phenotype vector +# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that +# intersect %in% with the RNA-Seq data +phenos.df <- subject.attrs %>% +filter(X %in% colnames(sense.filtered.cpm)) %>% +dplyr::select(X, Diag) +library(dplyr) +# create a phenotype vector +# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that +# intersect %in% with the RNA-Seq data +phenos.df <- subject.attrs %>% +filter(X %in% colnames(sense.filtered.cpm)) %>% +dplyr::select(X, Diag) +colnames(phenos.df) # $Diag is mdd diagnosis +# grab Diag column and convert character to factor +mddPheno <- as.factor(phenos.df$Diag) # this is our phenotype/class vector +summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control +#### Part B: Normalization +## 1: log2 transformation +# raw cpm boxplots and histogram of one sample +boxplot(sense.filtered.cpm,range=0, +ylab="raw probe intensity", main="Raw", names=mddPheno) +# log2 transformed boxplots and histogram +boxplot(log2(sense.filtered.cpm), range=0, +ylab="log2 intensity", main="Log2 Transformed", names=mddPheno) +hist(sense.filtered.cpm[,1], freq=F, ylab="density", +xlab="raw probe intensity", main="Raw Data Density for Sample 1") +hist(log2(sense.filtered.cpm[,1]), freq=F, ylab="density", +xlab="log2 probe intensity", main="log2 Data Density for Sample 1") +median(sense.filtered.cpm[,1]) +mode(sense.filtered.cpm[,1]) +average(sense.filtered.cpm[,1]) +mean(sense.filtered.cpm[,1]) +getmode <- function(v) { +uniqv <-unique(v) +uniqv[which.max(tabulate(match(v, uniqv)))] +} +getmode(sense.filtered.cpm[,1]) +median(sense.filtered.cpm[,1]) +mean(sense.filtered.cpm[,1]) +getmode(sense.filtered.cpm) +median(sense.filtered.cpm) +mean(sense.filtered.cpm) +getmode(log2(sense.filtered.cpm)) +median(log2(sense.filtered.cpm)) +mean(log2(sense.filtered.cpm)) +log2(sense.filtered.cpm) +log2(sense.filtered.cpm[,1]) +getmode(log2(sense.filtered.cpm[,1])) +median(log2(sense.filtered.cpm[,1])) +mean(log2(sense.filtered.cpm[,1])) +data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), +data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), +mean(log2(sense.filtered.cpm[,1]))), +Mode = c(getmode(sense.filtered.cpm[,1]), +getmode(log2(sense.filtered.cpm[,1]))), +Median = c(median(sense.filtered.cpm[,1])), +) +data +data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), +mean(log2(sense.filtered.cpm[,1]))), +Mode = c(getmode(sense.filtered.cpm[,1]), +getmode(log2(sense.filtered.cpm[,1]))), +Median = c(median(sense.filtered.cpm[,1])), +) +data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), +mean(log2(sense.filtered.cpm[,1]))), +Mode = c(getmode(sense.filtered.cpm[,1]), +getmode(log2(sense.filtered.cpm[,1]))), +Median = c(median(sense.filtered.cpm[,1])) +) +data +rownames(data) = c("Original", "Log2 Transformed") +data +table(data) +data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), +mean(log2(sense.filtered.cpm[,1]))), +Mode = c(getmode(sense.filtered.cpm[,1]), +getmode(log2(sense.filtered.cpm[,1]))), +Median = c(median(sense.filtered.cpm[,1])) +) +rownames(data) = c("Original", "Log2 Transformed") +data +## 2: Quantile Normalization +# install quantile normalize +#install.packages("BiocManager") +if (!require("BiocManager")) install.packages("BiocManager") +library(BiocManager) +if (!require("preprocessCore")) +BiocManager::install("preprocessCore") +library(preprocessCore) # replace with custom function? +# apply quantile normalization +mddExprData_quantile <- normalize.quantiles(sense.filtered.cpm) +boxplot(mddExprData_quantile,range=0,ylab="raw intensity", main="Quantile Normalized") +sapply(mddExprData_quantile, function(x) quantile(x, probs = seq(0, 1, 1/4))) diff --git a/.~lock.lab3_expression.docx# b/.~lock.lab3_expression.docx# index 327da75..782efae 100644 --- a/.~lock.lab3_expression.docx# +++ b/.~lock.lab3_expression.docx# @@ -1 +1 @@ -,noah,NovaArchSys,22.09.2022 16:59,file:///home/noah/.config/libreoffice/4; \ No newline at end of file +,noah,NovaArchSys,22.09.2022 19:18,file:///home/noah/.config/libreoffice/4; \ No newline at end of file diff --git a/Schrick-Noah_CS-6643_Lab-3.R b/Schrick-Noah_CS-6643_Lab-3.R index db8d9f7..633cf48 100644 --- a/Schrick-Noah_CS-6643_Lab-3.R +++ b/Schrick-Noah_CS-6643_Lab-3.R @@ -35,3 +35,65 @@ mddPheno <- as.factor(phenos.df$Diag) # this is our phenotype/class vector summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control +#### Part B: Normalization +## 1: log2 transformation +# raw cpm boxplots and histogram of one sample +boxplot(sense.filtered.cpm,range=0, + ylab="raw probe intensity", main="Raw", names=mddPheno) + +hist(sense.filtered.cpm[,1], freq=F, ylab="density", + xlab="raw probe intensity", main="Raw Data Density for Sample 1") + +# log2 transformed boxplots and histogram +boxplot(log2(sense.filtered.cpm), range=0, + ylab="log2 intensity", main="Log2 Transformed", names=mddPheno) + +hist(log2(sense.filtered.cpm[,1]), freq=F, ylab="density", + xlab="log2 probe intensity", main="log2 Data Density for Sample 1") + +getmode <- function(v) { + uniqv <-unique(v) + uniqv[which.max(tabulate(match(v, uniqv)))] +} + +data <- data.frame(Mean = c(mean(sense.filtered.cpm[,1]), + mean(log2(sense.filtered.cpm[,1]))), + Mode = c(getmode(sense.filtered.cpm[,1]), + getmode(log2(sense.filtered.cpm[,1]))), + Median = c(median(sense.filtered.cpm[,1])) + ) +rownames(data) = c("Original", "Log2 Transformed") +data + +## 2: Quantile Normalization +# install quantile normalize +#install.packages("BiocManager") +if (!require("BiocManager")) install.packages("BiocManager") +library(BiocManager) +if (!require("preprocessCore")) + BiocManager::install("preprocessCore") +library(preprocessCore) # replace with custom function? + +# apply quantile normalization +mddExprData_quantile <- normalize.quantiles(sense.filtered.cpm) + +boxplot(mddExprData_quantile,range=0, + ylab="raw intensity", main="Quantile Normalized") + +head(mddExprData_quantile) + +## 3: Log2 on quantile normalized data +mddExprData_quantileLog2 <- log2(mddExprData_quantile) +# add phenotype names to matrix +colnames(mddExprData_quantileLog2) <- mddPheno + +boxplot(mddExprData_quantileLog2,range=0, + ylab="log2 intensity", main="Quantile Normalized Log2") + +hist(log2(mddExprData_quantileLog2[,1]), freq=F, + ylab="density", xlab="log2 probe intensity", + main="log2 Quantile Normalized for Sample 1") + +## 4: Means +mean(mddExprData_quantileLog2[,1]) +colMeans(mddExprData_quantileLog2) diff --git a/lab3_expression.docx b/lab3_expression.docx index 6f4bf6d..3a4df7d 100644 Binary files a/lab3_expression.docx and b/lab3_expression.docx differ