# Lab 2 for the University of Tulsa's CS-6643 Bioinformatics Course # Hierarchical clustering (upgma and linkage matrix), WGCNA, Multidimensional scaling # Professor: Dr. McKinney, Fall 2022 # Noah L. Schrick - 1492657 ## Set Working Directory to file directory - RStudio approach setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #### Part A: hclust ## 1 - Writing a 2-column linkage matrix # No supporting code - see report PDF for work. ## 2 - Plot the tree based on Part 1 results # initialize an empty list that will contain fields of our hclust object a <- list() # encoding rules: # negative numbers are leaves (A,B,...,E) -> (-1,-2,...,-5) # positive are merged clusters (defined by row number in $merge) # each merged pair in a row a$merge <- rbind(c(-1, -2), # +1 (A-B) c( 1, -3), # +2 (A-B)-C c(-4, -5), # +3 (D-E) c( 2, 3), # +4 ((A-B)-C)-(DE) c( 4, -6)) # +5 (((A-B)-C)-(DE))-F a$height <- c(1, 2, 1.5, 3, 4) # merge heights a$order <- 1:6 # order of leaves a$labels <- LETTERS[1:6] # labels of leaves as Letters class(a) <- "hclust" # force a to be hclust object # plot the tree # plot knows that a is an hclust object and plots accordingly plot(a,hang=-1) # or use plot(as.dendrogram(a)) #### Part B - UPGMA and WGCNA ## 1 - Read in the distance matrix and plot the dendrogram my.dist1 <- read.table("distance_matrix.txt",sep="\t", header=T) rownames(my.dist1) = colnames(my.dist1) my.dist1[lower.tri(my.dist1)] = t(my.dist1)[lower.tri(my.dist1)] hc1 <- hclust(as.dist(my.dist1), method="average") hc1 plot(hc1,hang=-1) ## 2 - Paste the distance matrix in the report. Use header=T ## 3 - no supporting code ## 4 - install and run WGCNA # Install if (!require("BiocManager")) install.packages("BiocManager") library(BiocManager) if (!require("WGCNA")) BiocManager::install("WGCNA") # load WGCNA and run on the hc1 dendrogram cutree(hc1,k=3) # hclust cluster labels with k=3 clusters library(WGCNA) dynamicMods = cutreeDynamic( dendro = hc1, distM = my.dist1, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = 2) names(dynamicMods) <- colnames(my.dist1) ## 5 - identify clusters dynamicMods ## 6 - show clustered data dynamicColors = labels2colors(dynamicMods) # colors might not work table(dynamicColors) # Plot the dendrogram and colors underneath sizeGrWindow(8,6) plotDendroAndColors(hc1, dynamicColors, "Dynamic Tree Cut", dendroLabels = NULL, hang = -1, addGuide = TRUE, #guideHang = 0.05, main = "dynamic cut clustering") #### Part C - Distance Matrix ## 1 - no supporting code ## 2 - no supporting code ## 3 - Plot the dendrogram # initialize an empty list that will contain fields of our hclust object a <- list() # encoding rules: # negative numbers are leaves (A,B,...,E) -> (-1,-2,...,-5) # positive are merged clusters (defined by row number in $merge) # each merged pair in a row a$merge <- rbind(c(-1, -2), # +1 (A-B) c(1, -4), # +2 (A-B)-D c(2, -3)) # +3 ((A-B)-D)-C a$height <- c(0.14, 0.305, 0.51) # merge heights a$order <- 1:4 # order of leaves a$labels <- LETTERS[1:4] # labels of leaves as Letters class(a) <- "hclust" # force a to be hclust object # plot the tree # plot knows that a is an hclust object and plots accordingly plot(a) # or use plot(as.dendrogram(a)) ## 4 - Verify results # create distance matrix my.dist2 <- matrix(data=rep(0,16),ncol=4) my.dist2[1,2]<-.14; my.dist2[1,3]<-.48; my.dist2[1,4]<-.33; my.dist2[2,3]<-.50; my.dist2[2,4]<-.28; my.dist2[3,4]<-.55; # make matrix symmetric # make the lower triangle equal to the upper triangle my.dist2[lower.tri(my.dist2)] = t(my.dist2)[lower.tri(my.dist2)] diag(my.dist2)<-999 # big number to guarantee it’s not the min # compare with hclust test <- hclust(as.dist(my.dist2), method="average") # average linkage, UPGMA test$merge test$height test$order test$labels<-LETTERS[1:4] plot(test) #### Part D - MDS ## 1 - Show the 2D MDS ## MDS, you might need to refresh (sweep) the plot window locs<-cmdscale(as.dist(my.dist2), k=2) # k is number of mds components x<-locs[,1] y<-locs[,2] # pch=NA hides plot symbols: plot(x,y,main="Multi-dimensional Scaling", xlab="MDS dimension-1", ylab="MDS dimension-2", pch=NA,asp=1) # pch specifies plot symbol; we want null because we are using letter next # plot the text labels instead of symbols: text(x,y,test$labels,cex=1)