diff --git a/.Rhistory b/.Rhistory index d931055..d5f7a57 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,444 +1,3 @@ -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1,2)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1,2,3)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -#g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.breaks <- g.hist$breaks # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -alpha.LM -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) -plot(yeast) -hist(yeast) -hist(g.vec) -g.pois -g.mean -alpha.LM -alpha.ML -degree(g) -sort(degree(g)) -sort(degree(g),decreasing=FALSE) -sort(degree(g),decreasing=F) -sort(degree(g),decreasing=false) -sort(degree(g), decreasing = TRUE) -head(sort(degree(g), decreasing = TRUE)) -stddev(degree(g)) -sd(degree(g)) -tail(sort(degree(g), decreasing = TRUE)) -plot(log(g.breaks.clean), log(g.probs.clean)) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match @@ -489,24 +48,465 @@ S[i,j]<- mismatch_score } } } -len(S) -size(S) -nrows(S) -nrow(S) -col(S) -S -S[A][T] -S[A,T] -S -S[A] -S.A -S.at(A) -S[1.1] +#### Part B: Alignment Score Matrix (F) and Traceback Matrix (T) +x <- unlist(strsplit(x_str, "")) +y <- unlist(strsplit(y_str, "")) +x.len <- length(x) +y.len <- length(y) +Fmat<-matrix(0,nrow=x.len+1,ncol=y.len+1) +Tmat<-Fmat # 0's to start +rownames(Fmat)<-c("-",x); colnames(Fmat)<-c("-",y) +rownames(Tmat)<-c("-",x); colnames(Tmat)<-c("-",y) +# create first row and column +Fmat[,1]<- seq(from=0,len=x.len+1,by=-abs(gap_penalty)) +Fmat[1,]<- seq(from=0,len=y.len+1,by=-abs(gap_penalty)) +Tmat[,1]<- rep(2,x.len+1) # 2 means align with a gap in the upper seq +Tmat[1,]<- rep(3,y.len+1) # 3 means align with a gap in the side seq +x +T +Tmat +Fmat +#### Part C: Building Fmat and Tmat +my.numbers <- c(7,9,-4) +max(my.numbers) +which.max(my.numbers) +#### Part C: Building Fmat and Tmat +my.numbers <- c(9,9,-4) +which.max(my.numbers) S[1,1] -S["A", "T"] -dna.letters("A") -dna.letters -?index() -match("A", dna.letters) -match("T", dna.letters) -S[1,4] +S +S[1,2] +rowname(Fmat[i]) +(Fmat[1]) +(Fmat[2]) +(Fmat[2,]) +rownames(Fmat) +rownames(Fmat[1]) +rownames(Fmat[2]) +rownames(Fmat[2,1]) +rownames(Fmat)[1] +rownames(Fmat)[2] +S[rownames(Fmat)[2], colnames(Fmat)[2]) +S[rownames(Fmat)[2], colnames(Fmat)[2]] +#### Part C: Building Fmat and Tmat +for (i in 2:nrow(Fmat)){ +for (j in 2:ncol(Fmat)){ # use F recursive rules +test_three_cases <- c(Fmat[i-1, j-1] + S[rownames(Fmat)[i], colnames(Fmat)[j]], # 1 mis/match +# Fmat[i-1, j] + gap_penalty, # 2 up-gap +Fmat[i, j-1] + gap_penalty) # 3 left-gap +Fmat[i,j]=max(test_three_cases) +Tmat[i,j]=which.max(test_three_cases) +} +} +final_score <- Fmat[nrow(Fmat),ncol(Fmat)] +Fmat +final_score +#### Part C: Building Fmat and Tmat +for (i in 2:nrow(Fmat)){ +for (j in 2:ncol(Fmat)){ # use F recursive rules +test_three_cases <- c(Fmat[i-1, j-1] + S[rownames(Fmat)[i], colnames(Fmat)[j]], # 1 mis/match +Fmat[i-1, j] + gap_penalty, # 2 up-gap +Fmat[i, j-1] + gap_penalty) # 3 left-gap +Fmat[i,j]=max(test_three_cases) +Tmat[i,j]=which.max(test_three_cases) +} +} +final_score <- Fmat[nrow(Fmat),ncol(Fmat)] +Fmat +gap_penalty +Fmat[1,5] +Fmat[1,6] +Tmat +final_score +## Aligning from Tmat +m <- nrow(Fmat) +n <- ncol(Fmat) +top_seq <- list() +side_seq <- list() +while (m>0 && n>0){ +if (Tmat[m,n] == 3){ +top_seq.append(rownames(Tmat)[m]) +side_seq.append("-") +m-- +} else if (Tmat[m,n] == 2){ +while (m>0 && n>0){ +if (Tmat[m,n] == 3){ +top_seq.append(rownames(Tmat)[m]) +side_seq.append("-") +m-- +} +else if (Tmat[m,n] == 2){ +while (m>0 && n>0){ +if (Tmat[m,n] == 3){ +top_seq.append(rownames(Tmat)[m]) +side_seq.append("-") +m-- +} +while (m>0 && n>0){ +if (Tmat[m,n] == 3){ +top_seq.append(rownames(Tmat)[m]) +side_seq.append("-") +m-- +} else if (Tmat[m,n] == 2){ +n <- n-1 +while (m>0 && n>0){ +if (Tmat[m,n] == 3){ +top_seq.append(rownames(Tmat)[m]) +side_seq.append("-") +m <- m-1 +} else if (Tmat[m,n] == 2){ +top_seq.append("-") +side_seq.append(colnames(Tmat)[n]) +n <- n-1 +} else{ +top_seq.append(rownames(Tmat)[m]) +side_seq.append(colnames(Tmat)[n]) +m <- m-1 +n <- n-1 +} +} +while (m>0 && n>0){ +if (Tmat[m,n] == 3){ +top_seq <- append(top_seq, rownames(Tmat)[m]) +side_seq <- append(side_seq, "-") +m <- m-1 +} else if (Tmat[m,n] == 2){ +top_seq <- append(top_seq, "-") +side_seq <- append(side_seq, colnames(Tmat)[n]) +n <- n-1 +} else{ +top_seq <- append(top_seq, rownames(Tmat)[m]) +side_seq <- append(side_seq, colnames(Tmat)[n]) +m <- m-1 +n <- n-1 +} +} +top_seq +side_seq +top_seq[1] +paste(top_seq, collapse=',') +paste(top_seq, collapse=',') +paste(side_seq, collapse=',') +paste(rev(top_seq), collapse=',') +paste(rev(side_seq), collapse=',') +## Aligning from Tmat +m <- nrow(Fmat) +n <- ncol(Fmat) +m +Tmat[5,5] +rownames(Tmat)[5] +colnames(Tmat)[5] +n +rownames(Tmat)[5,7] +rownames(Tmat)[7] +colnames(Tmat)[7] +paste(rev(top_seq), collapse=',') +paste(rev(side_seq), collapse=',') +side_seq <- list() +top_seq <- list() +top_seq <- append(top_seq, rownames(Tmat)[m]) +side_seq <- append(side_seq, colnames(Tmat)[n]) +paste(rev(top_seq), collapse=',') +paste(rev(side_seq), collapse=',') +m <- m-1 +n <- n-1 +top_seq <- append(top_seq, rownames(Tmat)[m]) +side_seq <- append(side_seq, colnames(Tmat)[n]) +m <- m-1 +n <- n-1 +paste(rev(top_seq), collapse=',') +paste(rev(side_seq), collapse=',') +top_seq <- append(top_seq, rownames(Tmat)[m]) +side_seq <- append(side_seq, colnames(Tmat)[n]) +m <- m-1 +n <- n-1 +paste(rev(top_seq), collapse=',') +paste(rev(side_seq), collapse=',') +?rbind +curr_align_col <- rbind(x[n-1],y[m-1]) +curr_align_col +## Aligning from Tmat +m <- nrow(Tmat) +n <- ncol(Tmat) +seq_align <- character() +while ((n+m) != 2){ +if (Tmat[m,n] == 3){ +curr_align_col <- rbind("-",y[m-1]) +alignment <- cbind(curr_align_col,alignment) +m <- m-1 +} else if (Tmat[m,n] == 2){ +curr_align_col <- rbind(x[n-1],"-") +alignment <- cbind(curr_align_col,alignment) +n <- n-1 +} else{ +curr_align_col <- rbind(x[n-1], y[m-1]) +alignment <- cbind(curr_align_col, alignment) +m <- m-1 +n <- n-1 +} +} +seq_align <- character() +while ((n+m) != 2){ +if (Tmat[m,n] == 3){ +curr_align_col <- rbind("-",y[m-1]) +seq_align <- cbind(curr_align_col,seq_align) +m <- m-1 +} else if (Tmat[m,n] == 2){ +curr_align_col <- rbind(x[n-1],"-") +seq_align <- cbind(curr_align_col,seq_align) +n <- n-1 +} else{ +curr_align_col <- rbind(x[n-1], y[m-1]) +seq_align <- cbind(curr_align_col, seq_align) +m <- m-1 +n <- n-1 +} +} +?cbind +## Aligning from Tmat +n <- nrow(Tmat) +m <- ncol(Tmat) +seq_align <- character() +while ((n+m) != 2){ +if (Tmat[m,n] == 3){ +curr_align_col <- rbind("-",y[m-1]) +seq_align <- cbind(curr_align_col,seq_align) +m <- m-1 +} else if (Tmat[m,n] == 2){ +curr_align_col <- rbind(x[n-1],"-") +seq_align <- cbind(curr_align_col,seq_align) +n <- n-1 +} else{ +curr_align_col <- rbind(x[n-1], y[m-1]) +seq_align <- cbind(curr_align_col, seq_align) +m <- m-1 +n <- n-1 +} +} +## Aligning from Tmat +m <- nrow(Tmat) +n <- ncol(Tmat) +seq_align <- character() +while ((n+m) != 2){ +if (Tmat[m,n] == 3){ +curr_align_col <- rbind("-",y[m-1]) +seq_align <- cbind(curr_align_col,seq_align) +m <- m-1 +} else if (Tmat[m,n] == 2){ +curr_align_col <- rbind(x[n-1],"-") +seq_align <- cbind(curr_align_col,seq_align) +n <- n-1 +} else{ +curr_align_col <- rbind(x[n-1], y[m-1]) +seq_align <- cbind(curr_align_col, seq_align) +m <- m-1 +n <- n-1 +} +} +seq_align +curr_align_col +x +y +n +m +## Aligning from Tmat +n<-nrow(Tmat) # start at bottom right of Tmat +m<-ncol(Tmat) +alignment<-character() +while( (n+m)!=2 ){ +if (Tmat[n,m]==1){ +# subtract 1 from x and y indices because they are +# one row/col smaller than Tmat +curr_align_col <- rbind(x[n-1],y[m-1]) +alignment <- cbind(curr_align_col,alignment) +n=n-1; m=m-1; # move back diagonally +}else if(Tmat[n,m]==2){ +curr_align_col <- rbind(x[n-1],"-") # put gap in top seq +alignment <- cbind(curr_align_col,alignment) +n=n-1 # move up +}else{ +curr_align_col <- rbind("-",y[m-1]) # put gap in side seq +alignment <- cbind(curr_align_col,alignment) +m=m-1 # move left +} +} # end while +alignment +## Aligning from Tmat +n <- nrow(Tmat) +m <- ncol(Tmat) +seq_align <- character() +while( (n+m)!=2 ){ +if (Tmat[n,m]==1){ +curr_align_col <- rbind(x[n-1],y[m-1]) +seq_align <- cbind(curr_align_col,seq_align) +n <- n-1 +m <- m-1 +}else if(Tmat[n,m]==2){ +curr_align_col <- rbind(x[n-1],"-") # put gap in top seq +seq_align <- cbind(curr_align_col,seq_align) +n=n-1 # move up +}else{ +curr_align_col <- rbind("-",y[m-1]) # put gap in side seq +seq_align <- cbind(curr_align_col,seq_align) +m=m-1 # move left +} +} # end while +alignment +## Aligning from Tmat +n <- nrow(Tmat) +m <- ncol(Tmat) +seq_align <- character() +while( (n+m)!=2 ){ +if (Tmat[n,m]==1){ +curr_align_col <- rbind(x[n-1],y[m-1]) +seq_align <- cbind(curr_align_col,seq_align) +n <- n-1 +m <- m-1 +}else if(Tmat[n,m]==2){ +curr_align_col <- rbind(x[n-1],"-") +seq_align <- cbind(curr_align_col,seq_align) +n <- n-1 +}else{ +curr_align_col <- rbind("-",y[m-1]) +seq_align <- cbind(curr_align_col,seq_align) +m <- m-1 +} +} # end while +alignment +seq_align +#### Part D: Convert to functions +make.alignment.matrices <- function(x_str, y_str, match_score, mismatch_score, +gap_penalty){ +## Substitution Matrix +dna.letters<-c("A","C","G","T") +num.letters <- length(dna.letters) +S<-data.frame(matrix(0,nrow=num.letters,ncol=num.letters)) # data frame +rownames(S)<-dna.letters; colnames(S)<-dna.letters +for (i in 1:4){ +for (j in 1:4){ +if(dna.letters[i]==dna.letters[j]){ +S[i,j]<- match_score +} +else{ +S[i,j]<- mismatch_score +} +} +} +## F Matrix and T Matrix +x <- unlist(strsplit(x_str, "")) +y <- unlist(strsplit(y_str, "")) +x.len <- length(x) +y.len <- length(y) +Fmat<-matrix(0,nrow=x.len+1,ncol=y.len+1) +Tmat<-Fmat # 0's to start +rownames(Fmat)<-c("-",x); colnames(Fmat)<-c("-",y) +rownames(Tmat)<-c("-",x); colnames(Tmat)<-c("-",y) +# create first row and column +Fmat[,1]<- seq(from=0,len=x.len+1,by=-abs(gap_penalty)) +Fmat[1,]<- seq(from=0,len=y.len+1,by=-abs(gap_penalty)) +Tmat[,1]<- rep(2,x.len+1) # 2 means align with a gap in the upper seq +Tmat[1,]<- rep(3,y.len+1) # 3 means align with a gap in the side seq +## Building Fmat and Tmat +for (i in 2:nrow(Fmat)){ +for (j in 2:ncol(Fmat)){ # use F recursive rules +test_three_cases <- c(Fmat[i-1, j-1] + S[rownames(Fmat)[i], colnames(Fmat)[j]], # 1 mis/match +Fmat[i-1, j] + gap_penalty, # 2 up-gap +Fmat[i, j-1] + gap_penalty) # 3 left-gap +Fmat[i,j]=max(test_three_cases) +Tmat[i,j]=which.max(test_three_cases) +} +} +final_score <- Fmat[nrow(Fmat),ncol(Fmat)] +return(list(Fmat=Fmat, Tmat=Tmat, score_out=final_score)) +} +# load new input +x_str2 <- "GATTA" # side sequence +y_str2 <- "GAATTC" # top sequence +match_score <- 2 +mismatch_score <- -1 +gap_penalty <- -2 +align.list2 <- make.alignment.matrices(x_str2, y_str2, match_score, +mismatch_score, gap_penalty) +align.list2$Fmat +align.list2$Tmat +align.list2$score_out +if (!require("gplots")) install.packages("gplots") +library(gplots) +Fmat2 <- align.list2$Fmat +col = c("black","blue","red","yellow","green") +breaks = seq(min(Fmat2),max(Fmat2),len=length(col)+1) +heatmap.2(Fmat2[-1,-1], dendrogram='none', density.info="none", +Rowv=FALSE, Colv=FALSE, trace='none', +breaks = breaks, col = col, +sepwidth=c(0.01,0.01), +sepcolor="black", +colsep=1:ncol(Fmat2), +rowsep=1:nrow(Fmat2)) +#### Part E: Traceback Matrix +show.alignment <- function(x_str,y_str,Tmat){ +################ create the alignment +# input Tmat and the two sequences: x side seq and y is top seq +# make character vectors out of the strings +x<-unlist(strsplit(x_str,"")) +y<-unlist(strsplit(y_str,"")) +n<-nrow(Tmat) # start at bottom right of Tmat +m<-ncol(Tmat) +alignment<-character() +while( (n+m)!=2 ){ +if (Tmat[n,m]==1){ +# subtract 1 from x and y indices because they are +# one row/col smaller than Tmat +curr_align_col <- rbind(x[n-1],y[m-1]) +alignment <- cbind(curr_align_col,alignment) +n=n-1; m=m-1; # move back diagonally +}else if(Tmat[n,m]==2){ +curr_align_col <- rbind(x[n-1],"-") # put gap in top seq +alignment <- cbind(curr_align_col,alignment) +n=n-1 # move up +}else{ +curr_align_col <- rbind("-",y[m-1]) # put gap in side seq +alignment <- cbind(curr_align_col,alignment) +m=m-1 # move left +} +} # end while +return(alignment) +} # end function +alignment2 <- show.alignment(x_str2,y_str2,align.list2$Tmat) +alignment2 +write.table(alignment2,row.names=F,col.names=F,quote=F) +## Input 3 +x_str3 <- "ATCGT" # side sequence +y_str3 <- "TGGTG" # top sequence +match_score <- 1 +mismatch_score <- -2 +gap_penalty <- -1 +align.list3 <- make.alignment.matrices(x_str3, y_str3, match_score, +mismatch_score, gap_penalty) +align.list3$Fmat +align.list3$Tmat +align.list3$score_out +col = c("black","blue","red","yellow","green") +breaks = seq(min(Fmat3),max(Fmat3),len=length(col)+1) +heatmap.2(Fmat3[-1,-1], dendrogram='none', density.info="none", +Rowv=FALSE, Colv=FALSE, trace='none', +breaks = breaks, col = col, +sepwidth=c(0.01,0.01), +sepcolor="black", +colsep=1:ncol(Fmat3), +rowsep=1:nrow(Fmat3)) +Fmat3 <- align.list3$Fmat +align.list3$Fmat +Fmat3 <- align.list3$Fmat +align.list3$Tmat +align.list3$score_out +o +heatmap.2(Fmat3[-1,-1], dendrogram='none', density.info="none", +Rowv=FALSE, Colv=FALSE, trace='none', +breaks = breaks, col = col, +sepwidth=c(0.01,0.01), +sepcolor="black", +colsep=1:ncol(Fmat3), +rowsep=1:nrow(Fmat3)) +alignment3 <- show.alignment(x_str3,y_str3,align.list3$Tmat) +alignment3 +write.table(alignment3,row.names=F,col.names=F,quote=F) diff --git a/Schrick-Noah_CS-6643_Lab-9.R b/Schrick-Noah_CS-6643_Lab-9.R index cfb6b89..2d97a34 100644 --- a/Schrick-Noah_CS-6643_Lab-9.R +++ b/Schrick-Noah_CS-6643_Lab-9.R @@ -163,3 +163,67 @@ heatmap.2(Fmat2[-1,-1], dendrogram='none', density.info="none", sepcolor="black", colsep=1:ncol(Fmat2), rowsep=1:nrow(Fmat2)) + +#### Part E: Traceback Matrix +show.alignment <- function(x_str,y_str,Tmat){ + ################ create the alignment + # input Tmat and the two sequences: x side seq and y is top seq + # make character vectors out of the strings + x<-unlist(strsplit(x_str,"")) + y<-unlist(strsplit(y_str,"")) + + n<-nrow(Tmat) # start at bottom right of Tmat + m<-ncol(Tmat) + alignment<-character() + while( (n+m)!=2 ){ + if (Tmat[n,m]==1){ + # subtract 1 from x and y indices because they are + # one row/col smaller than Tmat + curr_align_col <- rbind(x[n-1],y[m-1]) + alignment <- cbind(curr_align_col,alignment) + n=n-1; m=m-1; # move back diagonally + }else if(Tmat[n,m]==2){ + curr_align_col <- rbind(x[n-1],"-") # put gap in top seq + alignment <- cbind(curr_align_col,alignment) + n=n-1 # move up + }else{ + curr_align_col <- rbind("-",y[m-1]) # put gap in side seq + alignment <- cbind(curr_align_col,alignment) + m=m-1 # move left + } + } # end while + return(alignment) +} # end function + +alignment2 <- show.alignment(x_str2,y_str2,align.list2$Tmat) +alignment2 +write.table(alignment2,row.names=F,col.names=F,quote=F) + +## Input 3 +x_str3 <- "ATCGT" # side sequence +y_str3 <- "TGGTG" # top sequence +match_score <- 1 +mismatch_score <- -2 +gap_penalty <- -1 + +align.list3 <- make.alignment.matrices(x_str3, y_str3, match_score, + mismatch_score, gap_penalty) +align.list3$Fmat +Fmat3 <- align.list3$Fmat +align.list3$Tmat +align.list3$score_out + +col = c("black","blue","red","yellow","green") +breaks = seq(min(Fmat3),max(Fmat3),len=length(col)+1) + +heatmap.2(Fmat3[-1,-1], dendrogram='none', density.info="none", + Rowv=FALSE, Colv=FALSE, trace='none', + breaks = breaks, col = col, + sepwidth=c(0.01,0.01), + sepcolor="black", + colsep=1:ncol(Fmat3), + rowsep=1:nrow(Fmat3)) + +alignment3 <- show.alignment(x_str3,y_str3,align.list3$Tmat) +alignment3 +write.table(alignment3,row.names=F,col.names=F,quote=F) diff --git a/Schrick-Noah_CS-6643_Lab-9.docx b/Schrick-Noah_CS-6643_Lab-9.docx index 9e5b673..9eea2b5 100644 Binary files a/Schrick-Noah_CS-6643_Lab-9.docx and b/Schrick-Noah_CS-6643_Lab-9.docx differ diff --git a/Schrick-Noah_CS-6643_Lab-9.pdf b/Schrick-Noah_CS-6643_Lab-9.pdf new file mode 100644 index 0000000..1ee6317 Binary files /dev/null and b/Schrick-Noah_CS-6643_Lab-9.pdf differ