Adding in skeleton R code and given code snippets
This commit is contained in:
parent
f17c2a1422
commit
ba1cdbc2a2
@ -2,3 +2,115 @@
|
||||
# Hierarchical clustering (upgma and linkage matrix), WGCNA, Multidimensional scaling
|
||||
# Professor: Dr. McKinney, Fall 2022
|
||||
# Noah L. Schrick - 1492657
|
||||
|
||||
## Set Working Directory to file directory - RStudio approach
|
||||
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
||||
|
||||
#### Part A: hclust
|
||||
## 1 - Writing a 2-column linkage matrix
|
||||
# No supporting code - see report PDF for work.
|
||||
|
||||
## 2 - Plot the tree based on Part 1 results
|
||||
# initialize an empty list that will contain fields of our hclust object
|
||||
a <- list()
|
||||
# encoding rules:
|
||||
# negative numbers are leaves (A,B,...,E) -> (-1,-2,...,-5)
|
||||
# positive are merged clusters (defined by row number in $merge)
|
||||
|
||||
# each merged pair in a row
|
||||
a$merge <- rbind(c(-1, -2), # +1 (A-B)
|
||||
c(1,-3) , # +2 (A-B)-C
|
||||
_______ , # +3 (D-E)
|
||||
_______ , # +4 ((A-B)-C)-(DE)
|
||||
_______) # +5 (((A-B)-C)-(DE))-F
|
||||
|
||||
a$height <- c(1, 2, 1.5, 3, 4) # merge heights
|
||||
a$order <- 1:6 # order of leaves
|
||||
a$labels <- LETTERS[1:6] # labels of leaves as Letters
|
||||
class(a) <- "hclust" # force a to be hclust object
|
||||
# plot the tree
|
||||
# plot knows that a is an hclust object and plots accordingly
|
||||
plot(a,hang=-1) # or use plot(as.dendrogram(a))
|
||||
|
||||
#### Part B - UPGMA and WGCNA
|
||||
## 1 - Read in the distance matrix and plot the dendrogram
|
||||
my.dist1 <- read.table("distance_matrix.txt",sep="\t", header=T)
|
||||
rownames(my.dist1) = colnames(my.dist1)
|
||||
my.dist1[lower.tri(my.dist1)] = t(my.dist1)[lower.tri(my.dist1)]
|
||||
hc1 <- hclust(as.dist(my.dist1), method="average")
|
||||
plot(hc1,hang=-1)
|
||||
|
||||
## 2 - Paste the distance matrix in the report. Use header=T
|
||||
|
||||
## 3 - no supporting code
|
||||
|
||||
## 4 - install and run WGCNA
|
||||
# Install
|
||||
install.packages("BiocManager")
|
||||
library(BiocManager)
|
||||
BiocManager::install("WGCNA")
|
||||
|
||||
# load WGCNA and run on the hc1 dendrogram
|
||||
cutree(hc1,k=3) # hclust cluster labels with k=3 clusters
|
||||
library(WGCNA)
|
||||
dynamicMods = cutreeDynamic(
|
||||
dendro = hc1,
|
||||
distM = my.dist1,
|
||||
deepSplit = 2,
|
||||
pamRespectsDendro = FALSE,
|
||||
minClusterSize = 2)
|
||||
names(dynamicMods) <- colnames(my.dist1)
|
||||
|
||||
## 5 - no supporting code
|
||||
|
||||
## 6 - show clustered data
|
||||
dynamicColors = labels2colors(dynamicMods) # colors might not work
|
||||
table(dynamicColors)
|
||||
# Plot the dendrogram and colors underneath
|
||||
sizeGrWindow(8,6)
|
||||
plotDendroAndColors(hc1,
|
||||
dynamicColors,
|
||||
"Dynamic Tree Cut",
|
||||
dendroLabels = NULL,
|
||||
hang = -1,
|
||||
addGuide = TRUE,
|
||||
#guideHang = 0.05,
|
||||
main = "dynamic cut clustering")
|
||||
|
||||
#### Part C - Distance Matrix
|
||||
## 1 - no supporting code
|
||||
|
||||
## 2 - no supporting code
|
||||
|
||||
## 3 - Plot the dendrogram
|
||||
|
||||
## 4 - Verify results
|
||||
# create distance matrix
|
||||
my.dist2 <- matrix(data=rep(0,16),ncol=4)
|
||||
my.dist2[1,2]<-.14; my.dist2[1,3]<-.48; my.dist2[1,4]<-.33;
|
||||
my.dist2[2,3]<-.50; my.dist2[2,4]<-.28;
|
||||
my.dist2[3,4]<-.55;
|
||||
# make matrix symmetric
|
||||
# make the lower triangle equal to the upper triangle
|
||||
my.dist2[lower.tri(my.dist2)] = t(my.dist2)[lower.tri(my.dist2)]
|
||||
diag(my.dist2)<-999 # big number to guarantee it’s not the min
|
||||
# compare with hclust
|
||||
test <- hclust(as.dist(my.dist2), method="average") # average linkage, UPGMA
|
||||
test$merge
|
||||
test$height
|
||||
test$order
|
||||
test$labels<-LETTERS[1:4]
|
||||
plot(test,hang=-1)
|
||||
|
||||
#### Part D - MDS
|
||||
## 1 - Show the 2D MDS
|
||||
## MDS, you might need to refresh (sweep) the plot window
|
||||
locs<-cmdscale(as.dist(my.dist2), k=2) # k is number of mds components
|
||||
x<-locs[,1]
|
||||
y<-locs[,2]
|
||||
# pch=NA hides plot symbols:
|
||||
plot(x,y,main="Multi-dimensional Scaling",
|
||||
xlab="MDS dimension-1", ylab="MDS dimension-2", pch=NA,asp=1)
|
||||
# pch specifies plot symbol; we want null because we are using letter next
|
||||
# plot the text labels instead of symbols:
|
||||
text(x,y,test$labels,cex=1)
|
||||
Loading…
x
Reference in New Issue
Block a user