diff --git a/.~lock.Schrick-Noah_CS-6643_Lab-4.docx# b/.~lock.Schrick-Noah_CS-6643_Lab-4.docx# index dc2814b..20caa52 100644 --- a/.~lock.Schrick-Noah_CS-6643_Lab-4.docx# +++ b/.~lock.Schrick-Noah_CS-6643_Lab-4.docx# @@ -1 +1 @@ -,noah,NovaArchSys,30.09.2022 11:57,file:///home/noah/.config/libreoffice/4; \ No newline at end of file +,noah,NovaArchSys,30.09.2022 12:12,file:///home/noah/.config/libreoffice/4; \ No newline at end of file diff --git a/Schrick-Noah_CS-6643_Lab-4.R b/Schrick-Noah_CS-6643_Lab-4.R index 5038cf7..51cab6c 100644 --- a/Schrick-Noah_CS-6643_Lab-4.R +++ b/Schrick-Noah_CS-6643_Lab-4.R @@ -81,3 +81,52 @@ p <- ggplot(mygene.data.df, aes(x=phenotype, y=gene, fill=phenotype)) + stat_boxplot(geom ='errorbar') + geom_boxplot() p <- p + xlab("MDD versus HC") + ylab(mygene) p + + +#### Part D: t-test for all filtered genes + +# Put it all together into a function to run in loop. +# First write a function that computes t-test for one gene. +# i is the data row for the gene +ttest_fn <- function(i){ + mygene <- rownames(GxS.covfilter)[i] + t.result <- t.test(GxS.covfilter[i,] ~ pheno.factor) + tstat <- t.result$statistic + pval <- t.result$p.value + # return vector of three things for each gene + c(mygene, tstat, pval) +} + +# Testing on the second gene +ttest_fn(2) + +## Testing on the rest: +# initialize an empty matrix to store the results +ttest_allgene.mat <- matrix(0,nrow=nrow(GxS.covfilter), ncol=3) +# run analysis on all gene rows +for (i in 1:nrow(GxS.covfilter)){ + ttest_allgene.mat[i,] <- ttest_fn(i) +} +# convert matrix to data frame and colnames +ttest_allgene.df <- data.frame(ttest_allgene.mat) +colnames(ttest_allgene.df) <- c("gene ", "t.stat", "p.val") + +# sort based on p-value +ttest_allgene.sorted <- ttest_allgene.df %>% + mutate_at("p.val", as.character) %>% + mutate_at("p.val", as.numeric) %>% + arrange(p.val) # sort +ttest_allgene.sorted[1:10,] # look at top 10 + +## Plot the result of the top gene +# create data frame for gene +myrow <- which(ttest_allgene.df$gene==ttest_allgene.sorted[1,1]) +mygene<-rownames(GxS.covfilter)[myrow] +mytopgene.data.df <- data.frame(mygene=GxS.covfilter[myrow,], + phenotype=pheno.factor) + +# boxplot +p <- ggplot(mytopgene.data.df, aes(x=phenotype, y=mygene, fill=phenotype)) + + stat_boxplot(geom ='errorbar') + geom_boxplot() +p <- p + xlab("MDD versus HC") + ylab(mygene) +p diff --git a/Schrick-Noah_CS-6643_Lab-4.docx b/Schrick-Noah_CS-6643_Lab-4.docx index 0c5c630..78c7611 100644 Binary files a/Schrick-Noah_CS-6643_Lab-4.docx and b/Schrick-Noah_CS-6643_Lab-4.docx differ