Lab 2 init and README
This commit is contained in:
commit
f17c2a1422
141
README.md
Normal file
141
README.md
Normal file
@ -0,0 +1,141 @@
|
||||
# Bioinformatics Lab 2
|
||||
|
||||
## Part A - hclust
|
||||
|
||||
1.) Write a 2-column linkage matrix using the following hclust rules
|
||||
|
||||
- Label the objects A, B, C, D, E, F as negative numbers -1, -2, -3, -4, -5, -6
|
||||
- Put the closest pair of objects in the first row of the matrix. The first row will be: -1, -2
|
||||
- Merge AB (1) with C (-3)
|
||||
- Repeat for the rest of the tree.
|
||||
- Show the full merge matrix and the heights of each row in the merge matrix.
|
||||
|
||||
2.) Create a script with the following <ins>*incomplete*</ins> code. Based on your UPGMA merging calculation and your linkage matrix above, fill in the blanks in the code below to plot the tree. Show the resulting plot.
|
||||
|
||||
# initialize an empty list that will contain fields of our hclust object
|
||||
a <- list()
|
||||
# encoding rules:
|
||||
# negative numbers are leaves (A,B,...,E) -> (-1,-2,...,-5)
|
||||
# positive are merged clusters (defined by row number in $merge)
|
||||
|
||||
# each merged pair in a row
|
||||
a$merge <- rbind(c(-1, -2), # +1 (A-B)
|
||||
c(1,-3) , # +2 (A-B)-C
|
||||
_______ , # +3 (D-E)
|
||||
_______ , # +4 ((A-B)-C)-(DE)
|
||||
_______) # +5 (((A-B)-C)-(DE))-F
|
||||
|
||||
a$height <- c(1, 2, 1.5, 3, 4) # merge heights
|
||||
a$order <- 1:6 # order of leaves
|
||||
a$labels <- LETTERS[1:6] # labels of leaves as Letters
|
||||
class(a) <- "hclust" # force a to be hclust object
|
||||
# plot the tree
|
||||
# plot knows that a is an hclust object and plots accordingly
|
||||
plot(a,hang=-1) # or use plot(as.dendrogram(a))
|
||||
|
||||
## Part B - UPGMA and WGCNA
|
||||
|
||||
1) Add the code below to your script to read in the distance matrix distance_matrix.txt and plot the dendrogram. Show the dendrogram.
|
||||
|
||||
# Set the Working Directory First
|
||||
my.dist1 <- read.table("distance_matrix.txt",sep="\t", header=T)
|
||||
rownames(my.dist1) = colnames(my.dist1)
|
||||
my.dist1[lower.tri(my.dist1)] = t(my.dist1)[lower.tri(my.dist1)]
|
||||
hc1 <- hclust(as.dist(my.dist1), method="average")
|
||||
plot(hc1,hang=-1)
|
||||
|
||||
2.) View the distance matrix in Excel and paste it in the report. Use header=T.
|
||||
|
||||
3.) What does header=T do in this situation?
|
||||
|
||||
4.) Add the following code to install and run WGCNA.
|
||||
|
||||
# method 1
|
||||
install.packages("WGCNA", dependencies=T)
|
||||
source("http://bioconductor.org/biocLite.R")
|
||||
biocLite(c("AnnotationDbi", "impute", "GO.db", "preprocessCore"))
|
||||
biocLite("WGCNA")
|
||||
install.packages("WGCNA")
|
||||
|
||||
# method 2 (easier method)
|
||||
install.packages("BiocManager")
|
||||
library(BiocManager)
|
||||
BiocManager::install("WGCNA")
|
||||
|
||||
# load WGCNA and run on the hc1 dendrogram
|
||||
cutree(hc1,k=3) # hclust cluster labels with k=3 clusters
|
||||
library(WGCNA)
|
||||
dynamicMods = cutreeDynamic(
|
||||
dendro = hc1,
|
||||
distM = my.dist1,
|
||||
deepSplit = 2,
|
||||
pamRespectsDendro = FALSE,
|
||||
minClusterSize = 2)
|
||||
names(dynamicMods) <- colnames(my.dist1)
|
||||
|
||||
5.) Looking at dynamicMods, what leaves are clustered together?
|
||||
|
||||
6.) Add the following code to show the clustered data. What do the colors at the bottom represent?
|
||||
|
||||
dynamicColors = labels2colors(dynamicMods) # colors might not work
|
||||
table(dynamicColors)
|
||||
# Plot the dendrogram and colors underneath
|
||||
sizeGrWindow(8,6)
|
||||
plotDendroAndColors(hc1,
|
||||
dynamicColors,
|
||||
"Dynamic Tree Cut",
|
||||
dendroLabels = NULL,
|
||||
hang = -1,
|
||||
addGuide = TRUE,
|
||||
#guideHang = 0.05,
|
||||
main = "dynamic cut clustering")
|
||||
|
||||
## Part C - Distance Matrix
|
||||
|
||||
| <> | A | B | C | D |
|
||||
|:------:|:------:|:------:|:------:|:------:|
|
||||
| A | x | 0.14 | 0.48 | 0.33 |
|
||||
| B | | x | 0.5 | 0.28 |
|
||||
| C | | | x | 0.55 |
|
||||
| D | | | | x |
|
||||
|
||||
|
||||
1.) Do the UPGMA by hand, showing the new UPGMA distance matrices as you merge clusters.
|
||||
|
||||
2.) Create the linkage matrix as you did in A.1, and list the height of each branch. For example, the first row of the linkage matrix is -1, -2 with height .14.
|
||||
|
||||
3.) Mirror the code you used in A.2 to plot the dendrogram for this new matrix.
|
||||
|
||||
4.) Use code below to verify your results by applying hclust to the distance matrix.
|
||||
|
||||
# create distance matrix
|
||||
my.dist2 <- matrix(data=rep(0,16),ncol=4)
|
||||
my.dist2[1,2]<-.14; my.dist2[1,3]<-.48; my.dist2[1,4]<-.33;
|
||||
my.dist2[2,3]<-.50; my.dist2[2,4]<-.28;
|
||||
my.dist2[3,4]<-.55;
|
||||
# make matrix symmetric
|
||||
# make the lower triangle equal to the upper triangle
|
||||
my.dist2[lower.tri(my.dist2)] = t(my.dist2)[lower.tri(my.dist2)]
|
||||
diag(my.dist2)<-999 # big number to guarantee it’s not the min
|
||||
# compare with hclust
|
||||
test <- hclust(as.dist(my.dist2), method="average") # average linkage, UPGMA
|
||||
test$merge
|
||||
test$height
|
||||
test$order
|
||||
test$labels<-LETTERS[1:4]
|
||||
plot(test,hang=-1)
|
||||
|
||||
## Part D - Multidimensional Scaling
|
||||
|
||||
1.) Show the 2d MDS plot for my.dist2. How does this visualization change your perspective on how the objects are clustered?
|
||||
|
||||
## MDS, you might need to refresh (sweep) the plot window
|
||||
locs<-cmdscale(as.dist(my.dist2), k=2) # k is number of mds components
|
||||
x<-locs[,1]
|
||||
y<-locs[,2]
|
||||
# pch=NA hides plot symbols:
|
||||
plot(x,y,main="Multi-dimensional Scaling",
|
||||
xlab="MDS dimension-1", ylab="MDS dimension-2", pch=NA,asp=1)
|
||||
# pch specifies plot symbol; we want null because we are using letter next
|
||||
# plot the text labels instead of symbols:
|
||||
text(x,y,test$labels,cex=1)
|
||||
4
Schrick-Noah_CS-6643_Lab-2.R
Normal file
4
Schrick-Noah_CS-6643_Lab-2.R
Normal file
@ -0,0 +1,4 @@
|
||||
# Lab 2 for the University of Tulsa's CS-6643 Bioinformatics Course
|
||||
# Hierarchical clustering (upgma and linkage matrix), WGCNA, Multidimensional scaling
|
||||
# Professor: Dr. McKinney, Fall 2022
|
||||
# Noah L. Schrick - 1492657
|
||||
BIN
Schrick-Noah_CS-6643_Lab-2.docx
Normal file
BIN
Schrick-Noah_CS-6643_Lab-2.docx
Normal file
Binary file not shown.
BIN
assignment.docx
Normal file
BIN
assignment.docx
Normal file
Binary file not shown.
1
distance_matrix.txt
Normal file
1
distance_matrix.txt
Normal file
@ -0,0 +1 @@
|
||||
A B C D E F
999 2 4 6 6 8
999 999 4 6 6 8
999 999 999 6 6 8
999 999 999 999 3 8
999 999 999 999 999 8
999 999 999 999 999 999
|
||||
46
labels2colors.R
Normal file
46
labels2colors.R
Normal file
@ -0,0 +1,46 @@
|
||||
labels2colors <- function (labels, zeroIsGrey = TRUE, colorSeq = NULL, naColor = "grey", commonColorCode = TRUE)
|
||||
{
|
||||
if (is.null(colorSeq))
|
||||
colorSeq = standardColors()
|
||||
if (is.numeric(labels)) {
|
||||
if (zeroIsGrey)
|
||||
minLabel = 0
|
||||
else minLabel = 1
|
||||
if (any(labels < 0, na.rm = TRUE))
|
||||
minLabel = min(c(labels), na.rm = TRUE)
|
||||
nLabels = labels
|
||||
}
|
||||
else {
|
||||
if (commonColorCode) {
|
||||
factors = factor(c(as.matrix(as.data.frame(labels))))
|
||||
nLabels = as.numeric(factors)
|
||||
dim(nLabels) = dim(labels)
|
||||
}
|
||||
else {
|
||||
labels = as.matrix(as.data.frame(labels))
|
||||
factors = list()
|
||||
for (c in 1:ncol(labels)) factors[[c]] = factor(labels[, c])
|
||||
nLabels = sapply(factors, as.numeric)
|
||||
}
|
||||
}
|
||||
if (max(nLabels, na.rm = TRUE) > length(colorSeq)) {
|
||||
nRepeats = as.integer((max(labels) - 1)/length(colorSeq)) +
|
||||
1
|
||||
warning(paste("labels2colors: Number of labels exceeds number of avilable colors.",
|
||||
"Some colors will be repeated", nRepeats, "times."))
|
||||
extColorSeq = colorSeq
|
||||
for (rep in 1:nRepeats) extColorSeq = c(extColorSeq, paste(colorSeq, ".", rep, sep = ""))
|
||||
}
|
||||
else {
|
||||
nRepeats = 1
|
||||
extColorSeq = colorSeq
|
||||
}
|
||||
colors = rep("grey", length(nLabels))
|
||||
fin = !is.na(nLabels)
|
||||
colors[!fin] = naColor
|
||||
finLabels = nLabels[fin]
|
||||
colors[fin][finLabels != 0] = extColorSeq[finLabels[finLabels != 0]]
|
||||
if (!is.null(dim(labels)))
|
||||
dim(colors) = dim(labels)
|
||||
colors
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user