diff --git a/Schrick-Noah_CS-6643_Lab-9.R b/Schrick-Noah_CS-6643_Lab-9.R index b611ca7..cfb6b89 100644 --- a/Schrick-Noah_CS-6643_Lab-9.R +++ b/Schrick-Noah_CS-6643_Lab-9.R @@ -83,7 +83,83 @@ while( (n+m)!=2 ){ m <- m-1 } } # end while - seq_align +#### Part D: Convert to functions +make.alignment.matrices <- function(x_str, y_str, match_score, mismatch_score, + gap_penalty){ + ## Substitution Matrix + dna.letters<-c("A","C","G","T") + num.letters <- length(dna.letters) + S<-data.frame(matrix(0,nrow=num.letters,ncol=num.letters)) # data frame + rownames(S)<-dna.letters; colnames(S)<-dna.letters + for (i in 1:4){ + for (j in 1:4){ + if(dna.letters[i]==dna.letters[j]){ + S[i,j]<- match_score + } + else{ + S[i,j]<- mismatch_score + } + } + } + + ## F Matrix and T Matrix + x <- unlist(strsplit(x_str, "")) + y <- unlist(strsplit(y_str, "")) + x.len <- length(x) + y.len <- length(y) + + Fmat<-matrix(0,nrow=x.len+1,ncol=y.len+1) + Tmat<-Fmat # 0's to start + + rownames(Fmat)<-c("-",x); colnames(Fmat)<-c("-",y) + rownames(Tmat)<-c("-",x); colnames(Tmat)<-c("-",y) + + # create first row and column + Fmat[,1]<- seq(from=0,len=x.len+1,by=-abs(gap_penalty)) + Fmat[1,]<- seq(from=0,len=y.len+1,by=-abs(gap_penalty)) + Tmat[,1]<- rep(2,x.len+1) # 2 means align with a gap in the upper seq + Tmat[1,]<- rep(3,y.len+1) # 3 means align with a gap in the side seq + + ## Building Fmat and Tmat + for (i in 2:nrow(Fmat)){ + for (j in 2:ncol(Fmat)){ # use F recursive rules + test_three_cases <- c(Fmat[i-1, j-1] + S[rownames(Fmat)[i], colnames(Fmat)[j]], # 1 mis/match + Fmat[i-1, j] + gap_penalty, # 2 up-gap + Fmat[i, j-1] + gap_penalty) # 3 left-gap + Fmat[i,j]=max(test_three_cases) + Tmat[i,j]=which.max(test_three_cases) + } + } + final_score <- Fmat[nrow(Fmat),ncol(Fmat)] + return(list(Fmat=Fmat, Tmat=Tmat, score_out=final_score)) +} + +# load new input +x_str2 <- "GATTA" # side sequence +y_str2 <- "GAATTC" # top sequence +match_score <- 2 +mismatch_score <- -1 +gap_penalty <- -2 + +align.list2 <- make.alignment.matrices(x_str2, y_str2, match_score, + mismatch_score, gap_penalty) +align.list2$Fmat +align.list2$Tmat +align.list2$score_out + +if (!require("gplots")) install.packages("gplots") +library(gplots) +Fmat2 <- align.list2$Fmat +col = c("black","blue","red","yellow","green") +breaks = seq(min(Fmat2),max(Fmat2),len=length(col)+1) + +heatmap.2(Fmat2[-1,-1], dendrogram='none', density.info="none", + Rowv=FALSE, Colv=FALSE, trace='none', + breaks = breaks, col = col, + sepwidth=c(0.01,0.01), + sepcolor="black", + colsep=1:ncol(Fmat2), + rowsep=1:nrow(Fmat2)) diff --git a/Schrick-Noah_CS-6643_Lab-9.docx b/Schrick-Noah_CS-6643_Lab-9.docx index f4ef9b9..9e5b673 100644 Binary files a/Schrick-Noah_CS-6643_Lab-9.docx and b/Schrick-Noah_CS-6643_Lab-9.docx differ