Adding in skeleton R code and given code snippets

This commit is contained in:
Noah L. Schrick 2022-09-14 22:09:03 -05:00
parent f17c2a1422
commit ba1cdbc2a2

View File

@ -2,3 +2,115 @@
# Hierarchical clustering (upgma and linkage matrix), WGCNA, Multidimensional scaling
# Professor: Dr. McKinney, Fall 2022
# Noah L. Schrick - 1492657
## Set Working Directory to file directory - RStudio approach
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#### Part A: hclust
## 1 - Writing a 2-column linkage matrix
# No supporting code - see report PDF for work.
## 2 - Plot the tree based on Part 1 results
# initialize an empty list that will contain fields of our hclust object
a <- list()
# encoding rules:
# negative numbers are leaves (A,B,...,E) -> (-1,-2,...,-5)
# positive are merged clusters (defined by row number in $merge)
# each merged pair in a row
a$merge <- rbind(c(-1, -2), # +1 (A-B)
c(1,-3) , # +2 (A-B)-C
_______ , # +3 (D-E)
_______ , # +4 ((A-B)-C)-(DE)
_______) # +5 (((A-B)-C)-(DE))-F
a$height <- c(1, 2, 1.5, 3, 4) # merge heights
a$order <- 1:6 # order of leaves
a$labels <- LETTERS[1:6] # labels of leaves as Letters
class(a) <- "hclust" # force a to be hclust object
# plot the tree
# plot knows that a is an hclust object and plots accordingly
plot(a,hang=-1) # or use plot(as.dendrogram(a))
#### Part B - UPGMA and WGCNA
## 1 - Read in the distance matrix and plot the dendrogram
my.dist1 <- read.table("distance_matrix.txt",sep="\t", header=T)
rownames(my.dist1) = colnames(my.dist1)
my.dist1[lower.tri(my.dist1)] = t(my.dist1)[lower.tri(my.dist1)]
hc1 <- hclust(as.dist(my.dist1), method="average")
plot(hc1,hang=-1)
## 2 - Paste the distance matrix in the report. Use header=T
## 3 - no supporting code
## 4 - install and run WGCNA
# Install
install.packages("BiocManager")
library(BiocManager)
BiocManager::install("WGCNA")
# load WGCNA and run on the hc1 dendrogram
cutree(hc1,k=3) # hclust cluster labels with k=3 clusters
library(WGCNA)
dynamicMods = cutreeDynamic(
dendro = hc1,
distM = my.dist1,
deepSplit = 2,
pamRespectsDendro = FALSE,
minClusterSize = 2)
names(dynamicMods) <- colnames(my.dist1)
## 5 - no supporting code
## 6 - show clustered data
dynamicColors = labels2colors(dynamicMods) # colors might not work
table(dynamicColors)
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(hc1,
dynamicColors,
"Dynamic Tree Cut",
dendroLabels = NULL,
hang = -1,
addGuide = TRUE,
#guideHang = 0.05,
main = "dynamic cut clustering")
#### Part C - Distance Matrix
## 1 - no supporting code
## 2 - no supporting code
## 3 - Plot the dendrogram
## 4 - Verify results
# create distance matrix
my.dist2 <- matrix(data=rep(0,16),ncol=4)
my.dist2[1,2]<-.14; my.dist2[1,3]<-.48; my.dist2[1,4]<-.33;
my.dist2[2,3]<-.50; my.dist2[2,4]<-.28;
my.dist2[3,4]<-.55;
# make matrix symmetric
# make the lower triangle equal to the upper triangle
my.dist2[lower.tri(my.dist2)] = t(my.dist2)[lower.tri(my.dist2)]
diag(my.dist2)<-999 # big number to guarantee its not the min
# compare with hclust
test <- hclust(as.dist(my.dist2), method="average") # average linkage, UPGMA
test$merge
test$height
test$order
test$labels<-LETTERS[1:4]
plot(test,hang=-1)
#### Part D - MDS
## 1 - Show the 2D MDS
## MDS, you might need to refresh (sweep) the plot window
locs<-cmdscale(as.dist(my.dist2), k=2) # k is number of mds components
x<-locs[,1]
y<-locs[,2]
# pch=NA hides plot symbols:
plot(x,y,main="Multi-dimensional Scaling",
xlab="MDS dimension-1", ylab="MDS dimension-2", pch=NA,asp=1)
# pch specifies plot symbol; we want null because we are using letter next
# plot the text labels instead of symbols:
text(x,y,test$labels,cex=1)