diff --git a/Schrick-Noah_CS-6643_Lab-2.R b/Schrick-Noah_CS-6643_Lab-2.R index f0bd440..9942b46 100644 --- a/Schrick-Noah_CS-6643_Lab-2.R +++ b/Schrick-Noah_CS-6643_Lab-2.R @@ -2,3 +2,115 @@ # Hierarchical clustering (upgma and linkage matrix), WGCNA, Multidimensional scaling # Professor: Dr. McKinney, Fall 2022 # Noah L. Schrick - 1492657 + +## Set Working Directory to file directory - RStudio approach +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) + +#### Part A: hclust +## 1 - Writing a 2-column linkage matrix +# No supporting code - see report PDF for work. + +## 2 - Plot the tree based on Part 1 results +# initialize an empty list that will contain fields of our hclust object +a <- list() +# encoding rules: +# negative numbers are leaves (A,B,...,E) -> (-1,-2,...,-5) +# positive are merged clusters (defined by row number in $merge) + +# each merged pair in a row +a$merge <- rbind(c(-1, -2), # +1 (A-B) + c(1,-3) , # +2 (A-B)-C + _______ , # +3 (D-E) + _______ , # +4 ((A-B)-C)-(DE) + _______) # +5 (((A-B)-C)-(DE))-F + +a$height <- c(1, 2, 1.5, 3, 4) # merge heights +a$order <- 1:6 # order of leaves +a$labels <- LETTERS[1:6] # labels of leaves as Letters +class(a) <- "hclust" # force a to be hclust object +# plot the tree +# plot knows that a is an hclust object and plots accordingly +plot(a,hang=-1) # or use plot(as.dendrogram(a)) + +#### Part B - UPGMA and WGCNA +## 1 - Read in the distance matrix and plot the dendrogram +my.dist1 <- read.table("distance_matrix.txt",sep="\t", header=T) +rownames(my.dist1) = colnames(my.dist1) +my.dist1[lower.tri(my.dist1)] = t(my.dist1)[lower.tri(my.dist1)] +hc1 <- hclust(as.dist(my.dist1), method="average") +plot(hc1,hang=-1) + +## 2 - Paste the distance matrix in the report. Use header=T + +## 3 - no supporting code + +## 4 - install and run WGCNA +# Install +install.packages("BiocManager") +library(BiocManager) +BiocManager::install("WGCNA") + +# load WGCNA and run on the hc1 dendrogram +cutree(hc1,k=3) # hclust cluster labels with k=3 clusters +library(WGCNA) +dynamicMods = cutreeDynamic( + dendro = hc1, + distM = my.dist1, + deepSplit = 2, + pamRespectsDendro = FALSE, + minClusterSize = 2) +names(dynamicMods) <- colnames(my.dist1) + +## 5 - no supporting code + +## 6 - show clustered data +dynamicColors = labels2colors(dynamicMods) # colors might not work +table(dynamicColors) +# Plot the dendrogram and colors underneath +sizeGrWindow(8,6) +plotDendroAndColors(hc1, + dynamicColors, + "Dynamic Tree Cut", + dendroLabels = NULL, + hang = -1, + addGuide = TRUE, + #guideHang = 0.05, + main = "dynamic cut clustering") + +#### Part C - Distance Matrix +## 1 - no supporting code + +## 2 - no supporting code + +## 3 - Plot the dendrogram + +## 4 - Verify results +# create distance matrix +my.dist2 <- matrix(data=rep(0,16),ncol=4) +my.dist2[1,2]<-.14; my.dist2[1,3]<-.48; my.dist2[1,4]<-.33; +my.dist2[2,3]<-.50; my.dist2[2,4]<-.28; +my.dist2[3,4]<-.55; +# make matrix symmetric +# make the lower triangle equal to the upper triangle +my.dist2[lower.tri(my.dist2)] = t(my.dist2)[lower.tri(my.dist2)] +diag(my.dist2)<-999 # big number to guarantee it’s not the min +# compare with hclust +test <- hclust(as.dist(my.dist2), method="average") # average linkage, UPGMA +test$merge +test$height +test$order +test$labels<-LETTERS[1:4] +plot(test,hang=-1) + +#### Part D - MDS +## 1 - Show the 2D MDS +## MDS, you might need to refresh (sweep) the plot window +locs<-cmdscale(as.dist(my.dist2), k=2) # k is number of mds components +x<-locs[,1] +y<-locs[,2] +# pch=NA hides plot symbols: +plot(x,y,main="Multi-dimensional Scaling", + xlab="MDS dimension-1", ylab="MDS dimension-2", pch=NA,asp=1) +# pch specifies plot symbol; we want null because we are using letter next +# plot the text labels instead of symbols: +text(x,y,test$labels,cex=1) \ No newline at end of file