diff --git a/.Rhistory b/.Rhistory
new file mode 100644
index 0000000..58aa9db
--- /dev/null
+++ b/.Rhistory
@@ -0,0 +1,512 @@
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2)
+################# Linear model: Least-Squares Fit #################
+g.breaks <- g.hist$breaks[-c(1,2,3)] # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.breaks[nz.probs.mask]
+#plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2)
+################# Linear model: Least-Squares Fit #################
+g.breaks <- g.hist$breaks[-c(1)] # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.breaks[nz.probs.mask]
+#plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2)
+################# Linear model: Least-Squares Fit #################
+#g.breaks <- g.hist$breaks[-c(1)] # remove 0
+g.breaks <- g.hist$breaks # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.breaks[nz.probs.mask]
+#plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2)
+################# Linear model: Least-Squares Fit #################
+g.breaks <- g.hist$breaks[-c(1)] # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.probs[nz.probs.mask]
+#plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
+alpha.LM
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2)
+################# Linear model: Least-Squares Fit #################
+g.breaks <- g.hist$breaks[-c(1)] # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.probs[nz.probs.mask]
+#plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2)
+################# Linear model: Least-Squares Fit #################
+g.breaks <- g.hist$breaks[-c(1)] # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.probs[nz.probs.mask]
+#plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2)
+################# Linear model: Least-Squares Fit #################
+g.breaks <- g.hist$breaks[-c(1)] # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.probs[nz.probs.mask]
+#plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3)
+################# Linear model: Least-Squares Fit #################
+g.breaks <- g.hist$breaks[-c(1)] # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.probs[nz.probs.mask]
+#plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3)
+plot(yeast)
+hist(yeast)
+hist(g.vec)
+g.pois
+g.mean
+alpha.LM
+alpha.ML
+degree(g)
+sort(degree(g))
+sort(degree(g),decreasing=FALSE)
+sort(degree(g),decreasing=F)
+sort(degree(g),decreasing=false)
+sort(degree(g), decreasing = TRUE)
+head(sort(degree(g), decreasing = TRUE))
+stddev(degree(g))
+sd(degree(g))
+tail(sort(degree(g), decreasing = TRUE))
+plot(log(g.breaks.clean), log(g.probs.clean))
+# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
+# Degree Distribution
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+library(igraph)
+library(igraphdata)
+data(yeast)
+g <- yeast
+g.netname <- "Yeast"
+################# Set up Work #################
+g.vec <- degree(g)
+g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
+" Network"))
+legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
+"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
+"#006CD1", "#E66100", "#D35FB7"))
+g.mean <- mean(g.vec)
+g.seq <- 0:max(g.vec) # x-axis
+################# Guessing Alpha #################
+alpha.guess <- 1.5
+lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
+################# Poisson #################
+g.pois <- dpois(g.seq, g.mean, log=F)
+lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3)
+################# Linear model: Least-Squares Fit #################
+g.breaks <- g.hist$breaks[-c(1)] # remove 0
+g.probs <- g.hist$density[-1] # make lengths match
+# Need to clean up probabilities that are 0
+nz.probs.mask <- g.probs!=0
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.probs[nz.probs.mask]
+plot(log(g.breaks.clean), log(g.probs.clean))
+g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
+summary(g.fit)
+alpha.LM <- coef(g.fit)[2]
+lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3)
+################# Max-Log-Likelihood #################
+n <- length(g.breaks.clean)
+kmin <- g.breaks.clean[1]
+alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
+alpha.ML
+lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3)
+plot(log(g.breaks.clean), log(g.probs.clean))
+g.breaks.clean <- g.breaks[nz.probs.mask]
+g.probs.clean <- g.probs[nz.probs.mask]
+plot(log(g.breaks.clean), log(g.probs.clean))
+## Set Working Directory to file directory - RStudio approach
+setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
+#### Part A: GenBank sequences and a multiple fasta file
+if (!require("ape")) install.packages("ape")
+library(ape) # needed for read.GenBank
+# fetch the mtDNA sequences
+mtDNA.MultiSeqs.list<-read.GenBank(c("AF011222","AF254446","X90314","AF089820",
+"AF176766","AF451972", "AY079510",
+"AF050738","AF176722","AF315498",
+"AF176731","AF451964"), as.character=TRUE)
+# look at species names
+mtDNA.Species<-attr(mtDNA.MultiSeqs.list,"species")
+# use species as name instead of genbank id
+names(mtDNA.MultiSeqs.list)<-mtDNA.Species
+names(mtDNA.MultiSeqs.list)
+# need to fix some names
+names(mtDNA.MultiSeqs.list)[1] <- paste("German_Neanderthal",sep="")
+names(mtDNA.MultiSeqs.list)[2] <- paste("Russian_Neanderthal",sep="")
+names(mtDNA.MultiSeqs.list)[3] <- paste("Human")
+names(mtDNA.MultiSeqs.list)[6] <- paste("Puti_Orangutan",sep="")
+names(mtDNA.MultiSeqs.list)[12] <- paste("Jari_Orangutan",sep="")
+names(mtDNA.MultiSeqs.list)
+# look at one of the sequences using $
+mtDNA.MultiSeqs.list$Human
+length(mtDNA.MultiSeqs.list$Human)
+## Convert to Biostrings object for the sequences
+if (!require("BiocManager")) install.packages("BiocManager")
+library(BiocManager)
+if (!require("Biostrings")) BiocManager::install("Biostrings")
+library(Biostrings)
+# loop through the list to create vector of strings for Biostrings input
+Names.vec <- c() # initialize speices names string vector
+Seqs.vec <- c()  # initialize sequence string vector
+for (mtDNA.name in names(mtDNA.MultiSeqs.list))
+{
+Names.vec <- c(Names.vec,mtDNA.name) # concatenate vector
+Seqs.vec <-c(Seqs.vec,paste(mtDNA.MultiSeqs.list[[mtDNA.name]],collapse=""))
+}
+mtDNA.multSeqs.bstr <- DNAStringSet(Seqs.vec) # convert to Biostring
+# name the Biostring sequences and compute stats
+names(mtDNA.multSeqs.bstr) <- Names.vec # count nucs and sequence lengths
+num.nts <- alphabetFrequency(mtDNA.multSeqs.bstr)[,1:4]
+mtDNA.lengths <- rowSums(num.nts)
+proportion.nts <- num.nts/mtDNA.lengths
+num.nts
+names(mtDA.multSeqs.bstr)
+names(mtDNA.multSeqs.bstr)
+mtDNA.multSeqs.bstr
+mtDNA.multSeqs.bstr
+mtDNA.multSeqs.bstr
+mtDNA.multSeqs.bstr --help
+print(mtDNA.multSeqs.bstr)
+print(mtDNA.multSeqs.bstr, -n40)
+print(mtDNA.multSeqs.bstr, -n 40)
+print(mtDNA.multSeqs.bstr, n=20)
+class(mtDNA.multSeqs.bstr)
+print(mtDNA.multSeqs.bstr)
+?print()
+?print
+table(mtDNA.multSeqs.bstr)
+mtDNA.multSeqs.bstr
+mtDNA.multSeqs.bstr$width
+mtDNA.multSeqs.bstr[,1]$width
+mtDNA.multSeqs.bstr[1,]$width
+mtDNA.multSeqs.bstr[1]$width
+mtDNA.multSeqs.bstr[1]
+mtDNA.multSeqs.bstr[1]$seq
+mtDNA.multSeqs.bstr[1]$width
+mtDNA.multSeqs.bstr[1]$names
+mtDNA.multSeqs.bstr$names
+# name the Biostring sequences and compute stats
+names(mtDNA.multSeqs.bstr) <- Names.vec # count nucs and sequence lengths
+mtDNA.multSeqs.bstr$names
+mtDNA.multSeqs.bstr$Names
+mtDNA.lengths
+table(mtDNA.lengths, Names.vec)
+cbind(mtDNA.lengths, Names.vec)
+sort(cbind(mtDNA.lengths, Names.vec))
+cbind(mtDNA.lengths, Names.vec)
+cbind(mtDNA.lengths, Names.vec)
+table(cbind(mtDNA.lengths, Names.vec))
+rbind(cbind(mtDNA.lengths, Names.vec))
+sort(rbind(cbind(mtDNA.lengths, Names.vec)))
+rbind(mtDNA.lengths, Names.vec)
+cbind(mtDNA.lengths, Names.vec)
+max(cbind(mtDNA.lengths, Names.vec))
+max(cbind(mtDNA.lengths, Names.vec))[,1]
+max(cbind(mtDNA.lengths, Names.vec)[,1])
+max(cbind(mtDNA.lengths, Names.vec)[1,])
+max(cbind(mtDNA.lengths, Names.vec)[,1])
+max(cbind(mtDNA.lengths, Names.vec))
+cbind(mtDNA.lengths, Names.vec)
+nlengthnames <- cbind(mtDNA.lengths, Names.vec)
+max(nlengthnames[,1])
+nlengthnames <- cbind(mtDNA.lengths, Names.vec)
+nlengthnames[which.max(nlengthnames[,1])]
+idx <- which.max(nlengthnames[,1])
+idx
+nlengthnames[idx, idx]
+nlengthnames[idx]
+nlengthnames
+nlengthnames[idx,]
+proportion.nts <- num.nts/mtDNA.lengths
+proportion.nts
diff --git a/MSA.png b/MSA.png
new file mode 100644
index 0000000..f7daf77
Binary files /dev/null and b/MSA.png differ
diff --git a/Schrick-Noah_CS-6643_Lab-10.R b/Schrick-Noah_CS-6643_Lab-10.R
index 44ea929..5e036c9 100644
--- a/Schrick-Noah_CS-6643_Lab-10.R
+++ b/Schrick-Noah_CS-6643_Lab-10.R
@@ -48,7 +48,7 @@ mtDNA.multSeqs.bstr <- DNAStringSet(Seqs.vec) # convert to Biostring
 
 # name the Biostring sequences and compute stats
 names(mtDNA.multSeqs.bstr) <- Names.vec # count nucs and sequence lengths
-# num.nts <- alphabetFrequency(mtDNA.multSeqs.bstr)[,1:4]
+num.nts <- alphabetFrequency(mtDNA.multSeqs.bstr)[,1:4]
 mtDNA.lengths <- rowSums(num.nts)
 proportion.nts <- num.nts/mtDNA.lengths
 
@@ -56,3 +56,40 @@ proportion.nts <- num.nts/mtDNA.lengths
 nlengthnames <- cbind(mtDNA.lengths, Names.vec)
 idx <- which.max(nlengthnames[,1])
 nlengthnames[idx,]
+
+
+#### Part B: Multiple Sequence Alignment
+if (!require("BiocManager")) install.packages("BiocManager")
+library(BiocManager)
+if (!require("msa")) BiocManager::install("msa")
+library(msa)
+mtDNA.msa <- msa(mtDNA.multSeqs.bstr,method="ClustalOmega")
+msaPrettyPrint(mtDNA.msa, file="mtDNA.pdf", output="pdf", showNames="left",
+               showLogo="none", askForOverwrite=FALSE, verbose=TRUE )
+## loop to make results data frame
+num_seqs <- length(Names.vec)
+# initialize data frame
+align.stats.df <- data.frame(species=rep(NA,num_seqs), seqlen=rep(0,num_seqs), 
+                             numgaps=rep(0,num_seqs), nt_a=rep(NA,num_seqs), 
+                             nt_c=rep(NA,num_seqs), nt_g=rep(NA,num_seqs), 
+                             nt_t=rep(NA,num_seqs))
+# DNAbin type required for dist.dna and helpful for other calculations
+mtDNA.msa.DNAbin <- as.DNAbin(mtDNA.msa) 
+for (i in 1:num_seqs){
+  seq_name <- Names.vec[i]
+  seq.vec <- as.character(mtDNA.msa.DNAbin[i,])
+  num.gaps <- sum(seq.vec=="-")
+  prop.nt.i <- proportion.nts[i,]
+  align.stats.df[i,] <- c(seq_name, mtDNA.lengths[i], num.gaps,
+                          round(prop.nt.i[1],digits=2), round(prop.nt.i[2],digits=2), 
+                          round(prop.nt.i[3],digits=2), round(prop.nt.i[4],digits=2))
+}
+
+# write to file
+write.table(align.stats.df,file="alignstats.tab",sep = "\t", row.names=FALSE, quote=FALSE)
+
+# you can use $ operator to grab a named column from a data.frame 
+# similar to grabbing a named variable from a list 
+align.stats.df$species
+align.stats.df$nt_a             # strings by default
+as.numeric(align.stats.df$nt_a) # convert to numeric
diff --git a/Schrick-Noah_CS-6643_Lab-10.docx b/Schrick-Noah_CS-6643_Lab-10.docx
index ec49494..b690bb1 100644
Binary files a/Schrick-Noah_CS-6643_Lab-10.docx and b/Schrick-Noah_CS-6643_Lab-10.docx differ
diff --git a/alignstats.tab b/alignstats.tab
new file mode 100644
index 0000000..26ea19f
--- /dev/null
+++ b/alignstats.tab
@@ -0,0 +1,13 @@
+species	seqlen	numgaps	nt_a	nt_c	nt_g	nt_t
+German_Neanderthal	379	81	0.31	0.32	0.13	0.24
+Russian_Neanderthal	345	76	0.33	0.34	0.11	0.22
+Human	360	76	0.33	0.34	0.11	0.22
+Gorilla_beringei_beringei	374	96	0.29	0.36	0.13	0.23
+Pan_troglodytes_troglodytes	340	105	0.32	0.36	0.1	0.22
+Puti_Orangutan	354	90	0.31	0.4	0.1	0.19
+Gorilla_gorilla_gorilla	367	71	0.3	0.35	0.14	0.21
+Gorilla_beringei_graueri	374	105	0.28	0.35	0.14	0.23
+Pan_troglodytes_schweinfurthii	339	110	0.34	0.35	0.09	0.22
+Pan_troglodytes_ellioti	411	111	0.33	0.36	0.1	0.21
+Pan_troglodytes_verus	339	39	0.34	0.37	0.1	0.2
+Jari_Orangutan	345	111	0.32	0.39	0.1	0.19
diff --git a/mtDNA.aux b/mtDNA.aux
new file mode 100644
index 0000000..e3b20cc
--- /dev/null
+++ b/mtDNA.aux
@@ -0,0 +1,14 @@
+\relax 
+\savedseqlength{1}{1}{369}
+\savedseqlength{1}{2}{374}
+\savedseqlength{1}{3}{374}
+\savedseqlength{1}{4}{354}
+\savedseqlength{1}{5}{345}
+\savedseqlength{1}{6}{360}
+\savedseqlength{1}{7}{379}
+\savedseqlength{1}{8}{345}
+\savedseqlength{1}{9}{340}
+\savedseqlength{1}{10}{339}
+\savedseqlength{1}{11}{411}
+\savedseqlength{1}{12}{339}
+\gdef \@abspage@last{2}
diff --git a/mtDNA.log b/mtDNA.log
new file mode 100644
index 0000000..e4b56d1
--- /dev/null
+++ b/mtDNA.log
@@ -0,0 +1,308 @@
+This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.11.8)  21 NOV 2022 16:54
+entering extended mode
+ restricted \write18 enabled.
+ %&-line parsing enabled.
+**mtDNA.tex
+(./mtDNA.tex
+LaTeX2e <2021-11-15> patch level 1
+L3 programming layer <2022-04-10>
+(/usr/share/texmf-dist/tex/latex/base/article.cls
+Document Class: article 2021/10/04 v1.4n Standard LaTeX document class
+(/usr/share/texmf-dist/tex/latex/base/size10.clo
+File: size10.clo 2021/10/04 v1.4n Standard LaTeX file (size option)
+)
+\c@part=\count185
+\c@section=\count186
+\c@subsection=\count187
+\c@subsubsection=\count188
+\c@paragraph=\count189
+\c@subparagraph=\count190
+\c@figure=\count191
+\c@table=\count192
+\abovecaptionskip=\skip47
+\belowcaptionskip=\skip48
+\bibindent=\dimen138
+)
+(/usr/lib/R/library/msa/tex/texshade.sty
+Package: texshade 2021/04/01 LaTeX TeXshade (v1.26)
+
+Package `texshade', Version 1.26 of 2021/04/01.
+(/usr/share/texmf-dist/tex/latex/graphics/color.sty
+Package: color 2021/12/07 v1.3c Standard LaTeX Color (DPC)
+
+(/usr/share/texmf-dist/tex/latex/graphics-cfg/color.cfg
+File: color.cfg 2016/01/02 v1.6 sample color configuration
+)
+Package color Info: Driver file: dvips.def on input line 149.
+
+(/usr/share/texmf-dist/tex/latex/graphics-def/dvips.def
+File: dvips.def 2017/06/20 v3.1d Graphics/color driver for dvips
+)
+(/usr/share/texmf-dist/tex/latex/graphics/dvipsnam.def
+File: dvipsnam.def 2016/06/17 v3.0m Driver-dependent file (DPC,SPQR)
+))
+(/usr/share/texmf-dist/tex/latex/graphics/graphics.sty
+Package: graphics 2021/03/04 v1.4d Standard LaTeX Graphics (DPC,SPQR)
+
+(/usr/share/texmf-dist/tex/latex/graphics/trig.sty
+Package: trig 2021/08/11 v1.11 sin cos tan (DPC)
+)
+(/usr/share/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
+File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
+)
+Package graphics Info: Driver file: pdftex.def on input line 107.
+
+(/usr/share/texmf-dist/tex/latex/graphics-def/pdftex.def
+File: pdftex.def 2020/10/05 v1.2a Graphics/color driver for pdftex
+))
+\structurefile=\read2
+\featurefile=\write3
+\alignfile=\read3
+\sublogofile=\read4
+\exp@rtfile=\write4
+\exp@rt@chimerafile=\write5
+
+(/usr/share/texmf-dist/tex/latex/amsfonts/amssymb.sty
+Package: amssymb 2013/01/14 v3.01 AMS font symbols
+
+(/usr/share/texmf-dist/tex/latex/amsfonts/amsfonts.sty
+Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
+\@emptytoks=\toks16
+\symAMSa=\mathgroup4
+\symAMSb=\mathgroup5
+LaTeX Font Info:    Redeclaring math symbol \hbar on input line 98.
+LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
+(Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
+))
+\symalphahelix=\mathgroup6
+\loopcount=\count193
+\innerloopcount=\count194
+\outerloopcount=\count195
+\seq@count=\count196
+\killseq@count=\count197
+\seq@percent=\count198
+\res@count=\count199
+\seq@pointer=\count266
+\pos@count=\count267
+\res@perline=\count268
+\end@count=\count269
+\cons@count=\count270
+\total@count=\count271
+\temp@count=\count272
+\triple@count=\count273
+\temp@@count=\count274
+\pos@sum=\count275
+\box@width=\skip49
+\name@width=\skip50
+\box@depth=\skip51
+\width@tmp=\skip52
+\box@height=\skip53
+\number@width=\skip54
+\line@stretch=\skip55
+\center@fill=\skip56
+\arrow@width=\skip57
+\arrow@height=\skip58
+\rule@thick=\skip59
+\arrow@thick=\skip60
+\logo@height=\skip61
+\equal@width=\skip62
+\equal@tmp=\skip63
+\equal@height=\skip64
+\temp@@length=\skip65
+\vspace@legend=\skip66
+\hspace@legend=\skip67
+\bar@length=\skip68
+Package color Info: Redefining color LightGray on input line 1716.
+Package color Info: Redefining color LightLightGray on input line 1817.
+Package color Info: Redefining color LightLightLightGray on input line 1919.
+)
+(/usr/share/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
+File: l3backend-pdftex.def 2022-04-14 L3 backend support: PDF output (pdfTeX)
+\l__color_backend_stack_int=\count276
+\l__pdf_internal_box=\box50
+)
+No file mtDNA.aux.
+\openout1 = `mtDNA.aux'.
+
+LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 24.
+LaTeX Font Info:    ... okay on input line 24.
+LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 24.
+LaTeX Font Info:    ... okay on input line 24.
+LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 24.
+LaTeX Font Info:    ... okay on input line 24.
+LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 24.
+LaTeX Font Info:    ... okay on input line 24.
+LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 24.
+LaTeX Font Info:    ... okay on input line 24.
+LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 24.
+LaTeX Font Info:    ... okay on input line 24.
+LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 24.
+LaTeX Font Info:    ... okay on input line 24.
+(/usr/share/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
+[Loading MPS to PDF converter (version 2006.09.02).]
+\scratchcounter=\count277
+\scratchdimen=\dimen139
+\scratchbox=\box51
+\nofMPsegments=\count278
+\nofMParguments=\count279
+\everyMPshowfont=\toks17
+\MPscratchCnt=\count280
+\MPscratchDim=\dimen140
+\MPnumerator=\count281
+\makeMPintoPDFobject=\count282
+\everyMPtoPDFconversion=\toks18
+) (/usr/share/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
+Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
+Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
+85.
+
+(/usr/share/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
+File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
+e
+))
+(/tmp/Rtmp9tSm8e/seq57d10064df.fasta:
+Runaway argument?
+! Paragraph ended before \inf@@get was complete.
+<to be read again> 
+                   \par 
+l.25 ...hade}{/tmp/Rtmp9tSm8e/seq57d10064df.fasta}
+                                                  
+I suspect you've forgotten a `}', causing me to apply this
+control sequence to too much text. How can we recover?
+My plan is to forget the whole thing and hope for the best.
+
+! Misplaced alignment tab character &.
+\msfline ->\par &
+                  & & & @
+l.25 ...hade}{/tmp/Rtmp9tSm8e/seq57d10064df.fasta}
+                                                  
+I can't figure out why you would want to use a tab mark
+here. If you just want an ampersand, the remedy is
+simple: Just type `I\&' now. But if some right brace
+up above has ended a previous alignment prematurely,
+you're probably due for more error messages, and you
+might try typing `S' now just to see what is salvageable.
+
+! Misplaced alignment tab character &.
+\msfline ->\par & &
+                    & & @
+l.25 ...hade}{/tmp/Rtmp9tSm8e/seq57d10064df.fasta}
+                                                  
+I can't figure out why you would want to use a tab mark
+here. If you just want an ampersand, the remedy is
+simple: Just type `I\&' now. But if some right brace
+up above has ended a previous alignment prematurely,
+you're probably due for more error messages, and you
+might try typing `S' now just to see what is salvageable.
+
+! Misplaced alignment tab character &.
+\msfline ->\par & & &
+                      & @
+l.25 ...hade}{/tmp/Rtmp9tSm8e/seq57d10064df.fasta}
+                                                  
+I can't figure out why you would want to use a tab mark
+here. If you just want an ampersand, the remedy is
+simple: Just type `I\&' now. But if some right brace
+up above has ended a previous alignment prematurely,
+you're probably due for more error messages, and you
+might try typing `S' now just to see what is salvageable.
+
+! Misplaced alignment tab character &.
+\msfline ->\par & & & &
+                        @
+l.25 ...hade}{/tmp/Rtmp9tSm8e/seq57d10064df.fasta}
+                                                  
+I can't figure out why you would want to use a tab mark
+here. If you just want an ampersand, the remedy is
+simple: Just type `I\&' now. But if some right brace
+up above has ended a previous alignment prematurely,
+you're probably due for more error messages, and you
+might try typing `S' now just to see what is salvageable.
+
+Runaway argument?
+! Paragraph ended before \check@letter was complete.
+<to be read again> 
+                   \par 
+l.25 ...hade}{/tmp/Rtmp9tSm8e/seq57d10064df.fasta}
+                                                  
+I suspect you've forgotten a `}', causing me to apply this
+control sequence to too much text. How can we recover?
+My plan is to forget the whole thing and hope for the best.
+
+Runaway argument?
+! Paragraph ended before \firstchar@get was complete.
+<to be read again> 
+                   \par 
+l.25 ...hade}{/tmp/Rtmp9tSm8e/seq57d10064df.fasta}
+                                                  
+I suspect you've forgotten a `}', causing me to apply this
+control sequence to too much text. How can we recover?
+My plan is to forget the whole thing and hope for the best.
+
+Runaway argument?
+! Paragraph ended before \firstchar@get was complete.
+<to be read again> 
+                   \par 
+l.47 \end{texshade}
+                   
+I suspect you've forgotten a `}', causing me to apply this
+control sequence to too much text. How can we recover?
+My plan is to forget the whole thing and hope for the best.
+
+Runaway argument?
+! Paragraph ended before \res@get was complete.
+<to be read again> 
+                   \par 
+l.47 \end{texshade}
+                   
+I suspect you've forgotten a `}', causing me to apply this
+control sequence to too much text. How can we recover?
+My plan is to forget the whole thing and hope for the best.
+
+! Misplaced alignment tab character &.
+\seq@line ->\par &
+                  @
+l.47 \end{texshade}
+                   
+I can't figure out why you would want to use a tab mark
+here. If you just want an ampersand, the remedy is
+simple: Just type `I\&' now. But if some right brace
+up above has ended a previous alignment prematurely,
+you're probably due for more error messages, and you
+might try typing `S' now just to see what is salvageable.
+
+. . . . . [1
+Non-PDF special ignored!
+<special> papersize=794.96999pt,614.295pt
+
+{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] )
+LaTeX Font Info:    Trying to load font information for U+msa on input line 47.
+
+
+(/usr/share/texmf-dist/tex/latex/amsfonts/umsa.fd
+File: umsa.fd 2013/01/14 v3.01 AMS symbols A
+)
+LaTeX Font Info:    Trying to load font information for U+msb on input line 47.
+
+
+(/usr/share/texmf-dist/tex/latex/amsfonts/umsb.fd
+File: umsb.fd 2013/01/14 v3.01 AMS symbols B
+) [2] (./mtDNA.aux) ) 
+Here is how much of TeX's memory you used:
+ 8179 strings out of 478238
+ 97095 string characters out of 5850456
+ 1334999 words of memory out of 5000000
+ 26433 multiletter control sequences out of 15000+600000
+ 471599 words of font info for 39 fonts, out of 8000000 for 9000
+ 1141 hyphenation exceptions out of 8191
+ 541i,6n,65p,182b,4214s stack positions out of 5000i,500n,10000p,200000b,80000s
+</usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr1
+0.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/sh
+are/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt10.pfb>
+Output written on mtDNA.pdf (2 pages, 86917 bytes).
+PDF statistics:
+ 26 PDF objects out of 1000 (max. 8388607)
+ 15 compressed objects within 1 object stream
+ 0 named destinations out of 1000 (max. 500000)
+ 1 words of extra memory for PDF output out of 10000 (max. 10000000)
+
diff --git a/mtDNA.pdf b/mtDNA.pdf
new file mode 100644
index 0000000..c29f725
Binary files /dev/null and b/mtDNA.pdf differ
diff --git a/mtDNA.tex b/mtDNA.tex
new file mode 100644
index 0000000..22be093
--- /dev/null
+++ b/mtDNA.tex
@@ -0,0 +1,48 @@
+\documentclass[10pt]{article}
+
+\usepackage{texshade}
+
+\headheight=0pt
+\headsep=0pt
+\hoffset=0pt
+\voffset=0pt
+\paperwidth=11in
+\paperheight=8.5in
+\ifx\pdfoutput\undefined
+\relax
+\else
+\pdfpagewidth=\paperwidth
+\pdfpageheight=\paperheight
+\fi
+\oddsidemargin=-0.9in
+\topmargin=-0.7in
+\textwidth=10.8in
+\textheight=7.9in
+
+\pagestyle{empty}
+
+\begin{document}
+\begin{texshade}{/tmp/Rtmp9tSm8e/seq57d10064df.fasta}
+\seqtype{N}
+\shadingmode{identical}
+\threshold{50}
+\showconsensus[ColdHot]{bottom}
+\shadingcolors{blues}
+\hidelogoscale
+\shownames{left}
+\nameseq{1}{Gorilla gorilla gorilla}
+\nameseq{2}{Gorilla beringei beringei}
+\nameseq{3}{Gorilla beringei graueri}
+\nameseq{4}{Puti Orangutan}
+\nameseq{5}{Jari Orangutan}
+\nameseq{6}{Human}
+\nameseq{7}{German Neanderthal}
+\nameseq{8}{Russian Neanderthal}
+\nameseq{9}{Pan troglodytes troglodytes}
+\nameseq{10}{Pan troglodytes schweinfurthii}
+\nameseq{11}{Pan troglodytes ellioti}
+\nameseq{12}{Pan troglodytes verus}
+\shownumbering{right}
+\showlegend
+\end{texshade}
+\end{document}