commit f17c2a1422192efbd3aa16ddb35045a4a27a707f
Author: noah <noah@theschricks.com>
Date:   Thu Sep 8 14:58:43 2022 -0500

    Lab 2 init and README

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c65e941
--- /dev/null
+++ b/README.md
@@ -0,0 +1,141 @@
+# Bioinformatics Lab 2
+
+## Part A - hclust
+
+1.) Write a 2-column linkage matrix using the following hclust rules
+
+- Label the objects A, B, C, D, E, F as negative numbers -1, -2, -3, -4, -5, -6
+- Put the closest pair of objects in the first row of the matrix. The first row will be: -1, -2
+- Merge AB (1) with C (-3)
+- Repeat for the rest of the tree.
+- Show the full merge matrix and the heights of each row in the merge matrix.
+
+2.) Create a script with the following <ins>*incomplete*</ins> code. Based on your UPGMA merging calculation and your linkage matrix above, fill in the blanks in the code below to plot the tree.  Show the resulting plot. 
+
+	# initialize an empty list that will contain fields of our hclust object
+	a <- list()  
+	#    encoding rules: 
+	#    negative numbers are leaves (A,B,...,E) -> (-1,-2,...,-5) 
+	#    positive are merged clusters (defined by row number in $merge)
+
+	# each merged pair in a row  
+	a$merge <- rbind(c(-1, -2),  # +1 (A-B)
+					c(1,-3)  ,  # +2 (A-B)-C  
+					_______  ,  # +3 (D-E) 
+					_______  ,  # +4 ((A-B)-C)-(DE)
+					_______)    # +5 (((A-B)-C)-(DE))-F
+
+	a$height <- c(1, 2, 1.5, 3, 4) # merge heights
+	a$order <- 1:6               # order of leaves
+	a$labels <- LETTERS[1:6]     # labels of leaves as Letters
+	class(a) <- "hclust"         # force a to be hclust object
+	# plot the tree
+	# plot knows that a is an hclust object and plots accordingly 
+	plot(a,hang=-1) # or use plot(as.dendrogram(a))
+
+## Part B - UPGMA and WGCNA
+
+1) Add the code below to your script to read in the distance matrix distance_matrix.txt and plot the dendrogram.  Show the dendrogram.
+
+	   # Set the Working Directory First
+	   my.dist1 <- read.table("distance_matrix.txt",sep="\t", header=T)
+	   rownames(my.dist1) = colnames(my.dist1)
+	   my.dist1[lower.tri(my.dist1)] = t(my.dist1)[lower.tri(my.dist1)]
+	   hc1 <- hclust(as.dist(my.dist1), method="average")
+	   plot(hc1,hang=-1)
+
+2.) View the distance matrix in Excel and paste it in the report. Use header=T.
+
+3.) What does header=T do in this situation?
+
+4.) Add the following code to install and run WGCNA.
+
+	# method 1
+	install.packages("WGCNA", dependencies=T)
+	source("http://bioconductor.org/biocLite.R")
+	biocLite(c("AnnotationDbi", "impute", "GO.db", "preprocessCore")) 
+	biocLite("WGCNA")  
+	install.packages("WGCNA")
+
+    # method 2 (easier method)
+    install.packages("BiocManager")
+    library(BiocManager)
+	BiocManager::install("WGCNA")
+
+	# load WGCNA and run on the hc1 dendrogram
+	cutree(hc1,k=3)  # hclust cluster labels with k=3 clusters
+	library(WGCNA)
+	dynamicMods = cutreeDynamic(
+			dendro = hc1, 
+			distM =	my.dist1,
+			deepSplit = 2, 
+			pamRespectsDendro = FALSE,
+			minClusterSize = 2)
+	names(dynamicMods) <- colnames(my.dist1)
+
+5.) Looking at dynamicMods, what leaves are clustered together?
+
+6.) Add the following code to show the clustered data. What do the colors at the bottom represent?
+
+	dynamicColors = labels2colors(dynamicMods) # colors might not work
+	table(dynamicColors)
+	# Plot the dendrogram and colors underneath
+	sizeGrWindow(8,6)
+	plotDendroAndColors(hc1, 
+						dynamicColors,
+						"Dynamic Tree Cut",
+						dendroLabels = NULL,
+						hang = -1,
+						addGuide = TRUE, 
+						#guideHang = 0.05,
+						main = "dynamic cut clustering")
+
+## Part C - Distance Matrix
+
+|   <>	 |    A   |   B    |   C    |   D    |
+|:------:|:------:|:------:|:------:|:------:|
+| A 	 |    x	  |  0.14  |   0.48 |  0.33  |
+| B 	 |   	  |   x    |   0.5  |  0.28  |
+| C 	 |   	  |        |    x   |  0.55  |
+| D 	 |   	  |        |        |    x   |
+
+
+1.) Do the UPGMA by hand, showing the new UPGMA distance matrices as you merge clusters.
+
+2.) Create the linkage matrix as you did in A.1, and list the height of each branch. For example, the first row of the linkage matrix is -1, -2 with height .14.  
+
+3.) Mirror the code you used in A.2 to plot the dendrogram for this new matrix. 
+
+4.) Use code below to verify your results by applying hclust to the distance matrix.  
+
+	# create distance matrix
+	my.dist2 <- matrix(data=rep(0,16),ncol=4)
+	my.dist2[1,2]<-.14; my.dist2[1,3]<-.48; my.dist2[1,4]<-.33;
+						my.dist2[2,3]<-.50; my.dist2[2,4]<-.28;
+											my.dist2[3,4]<-.55;
+	# make matrix symmetric
+	# make the lower triangle equal to the upper triangle 
+	my.dist2[lower.tri(my.dist2)] = t(my.dist2)[lower.tri(my.dist2)]
+	diag(my.dist2)<-999 # big number to guarantee it’s not the min 
+	# compare with hclust
+	test <- hclust(as.dist(my.dist2), method="average") # average linkage, UPGMA
+	test$merge
+	test$height
+	test$order
+	test$labels<-LETTERS[1:4]
+	plot(test,hang=-1)
+
+## Part D - Multidimensional Scaling
+
+1.) Show the 2d MDS plot for my.dist2. How does this visualization change your perspective on how the objects are clustered?
+
+	## MDS, you might need to refresh (sweep) the plot window
+	locs<-cmdscale(as.dist(my.dist2), k=2) # k is number of mds components
+	x<-locs[,1]
+	y<-locs[,2]
+	# pch=NA hides plot symbols:
+	plot(x,y,main="Multi-dimensional Scaling", 
+			xlab="MDS dimension-1", ylab="MDS dimension-2", pch=NA,asp=1)
+	# pch specifies plot symbol; we want null because we are using letter next
+	# plot the text labels instead of symbols:
+	text(x,y,test$labels,cex=1)
diff --git a/Schrick-Noah_CS-6643_Lab-2.R b/Schrick-Noah_CS-6643_Lab-2.R
new file mode 100644
index 0000000..f0bd440
--- /dev/null
+++ b/Schrick-Noah_CS-6643_Lab-2.R
@@ -0,0 +1,4 @@
+# Lab 2 for the University of Tulsa's CS-6643 Bioinformatics Course
+# Hierarchical clustering (upgma and linkage matrix), WGCNA, Multidimensional scaling
+# Professor: Dr. McKinney, Fall 2022
+# Noah L. Schrick - 1492657
diff --git a/Schrick-Noah_CS-6643_Lab-2.docx b/Schrick-Noah_CS-6643_Lab-2.docx
new file mode 100644
index 0000000..14ed45c
Binary files /dev/null and b/Schrick-Noah_CS-6643_Lab-2.docx differ
diff --git a/assignment.docx b/assignment.docx
new file mode 100644
index 0000000..14ed45c
Binary files /dev/null and b/assignment.docx differ
diff --git a/distance_matrix.txt b/distance_matrix.txt
new file mode 100644
index 0000000..39dece8
--- /dev/null
+++ b/distance_matrix.txt
@@ -0,0 +1 @@
+A	B	C	D	E	F999	2	4	6	6	8999	999	4	6	6	8999	999	999	6	6	8999	999	999	999	3	8999	999	999	999	999	8999	999	999	999	999	999
\ No newline at end of file
diff --git a/labels2colors.R b/labels2colors.R
new file mode 100644
index 0000000..43e5de1
--- /dev/null
+++ b/labels2colors.R
@@ -0,0 +1,46 @@
+labels2colors <- function (labels, zeroIsGrey = TRUE, colorSeq = NULL, naColor = "grey", commonColorCode = TRUE) 
+{
+  if (is.null(colorSeq)) 
+    colorSeq = standardColors()
+  if (is.numeric(labels)) {
+    if (zeroIsGrey) 
+      minLabel = 0
+    else minLabel = 1
+    if (any(labels < 0, na.rm = TRUE)) 
+      minLabel = min(c(labels), na.rm = TRUE)
+    nLabels = labels
+  }
+  else {
+    if (commonColorCode) {
+      factors = factor(c(as.matrix(as.data.frame(labels))))
+      nLabels = as.numeric(factors)
+      dim(nLabels) = dim(labels)
+    }
+    else {
+      labels = as.matrix(as.data.frame(labels))
+      factors = list()
+      for (c in 1:ncol(labels)) factors[[c]] = factor(labels[, c])
+      nLabels = sapply(factors, as.numeric)
+    }
+  }
+  if (max(nLabels, na.rm = TRUE) > length(colorSeq)) {
+    nRepeats = as.integer((max(labels) - 1)/length(colorSeq)) + 
+      1
+    warning(paste("labels2colors: Number of labels exceeds number of avilable colors.", 
+                  "Some colors will be repeated", nRepeats, "times."))
+    extColorSeq = colorSeq
+    for (rep in 1:nRepeats) extColorSeq = c(extColorSeq, paste(colorSeq, ".", rep, sep = ""))
+  }
+  else {
+    nRepeats = 1
+    extColorSeq = colorSeq
+  }
+  colors = rep("grey", length(nLabels))
+  fin = !is.na(nLabels)
+  colors[!fin] = naColor
+  finLabels = nLabels[fin]
+  colors[fin][finLabels != 0] = extColorSeq[finLabels[finLabels != 0]]
+  if (!is.null(dim(labels))) 
+    dim(colors) = dim(labels)
+  colors
+}