79 lines
2.4 KiB
R
79 lines
2.4 KiB
R
# Lab 8 for the University of Tulsa's CS-6643 Bioinformatics Course
|
|
# Pairwise Sequence Alignment
|
|
# Professor: Dr. McKinney, Fall 2022
|
|
# Noah L. Schrick - 1492657
|
|
|
|
## Set Working Directory to file directory - RStudio approach
|
|
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
|
|
|
#### Part A: EMBOSS pairwise alignment server and influenza
|
|
## Load associated supportive libraries
|
|
if (!require("seqinr")) install.packages("seqinr")
|
|
library(seqinr)
|
|
|
|
## Load in the fasta file
|
|
fasta2vec <- function(fasta.file){
|
|
if (!require("seqinr")) install.packages("seqinr")
|
|
library(seqinr)
|
|
fasta <- read.fasta(file=fasta.file, as.string= TRUE)
|
|
fasta.string <- fasta[[1]][1]
|
|
fasta.list <- strsplit(fasta.string,"")
|
|
fasta.vec <- unlist(fasta.list)
|
|
}
|
|
|
|
h1n1.Cali.dna.vec <- fasta2vec("FJ969540.1.fasta")
|
|
h1n1.Bris.dna.vec <- fasta2vec("CY030230.1.fasta")
|
|
|
|
## Convert DNA seq into amino acid seq
|
|
h1n1.Cali.aa.vec<-seqinr::translate(h1n1.Cali.dna.vec)
|
|
h1n1.Cali.aa.table<-table(h1n1.Cali.aa.vec) # aa count table
|
|
|
|
## Create dotchart of amino acid freqs
|
|
# sort the table (smallest to largest)
|
|
h1n1.Cali.aa.sortedtable <-h1n1.Cali.aa.table[order(h1n1.Cali.aa.table)]
|
|
# convert the AA table names to 3-letter code
|
|
# You can ignore the warning or remove the offending letters from
|
|
# the table
|
|
names(h1n1.Cali.aa.sortedtable)<-aaa(names(h1n1.Cali.aa.sortedtable))
|
|
dotchart(h1n1.Cali.aa.sortedtable)
|
|
|
|
# Repeating for Brisbane and accounting for shift in start codon
|
|
h1n1.Bris.aa.vec <- seqinr::translate(h1n1.Bris.dna.vec)
|
|
h1n1.Bris.aa.vec <- h1n1.Bris.aa.vec[-seq(1, match('M', h1n1.Bris.aa.vec)-1)]
|
|
|
|
h1n1.Bris.aa.table<-table(h1n1.Bris.aa.vec) # aa count table
|
|
h1n1.Bris.aa.sortedtable <-h1n1.Bris.aa.table[order(h1n1.Bris.aa.table)]
|
|
|
|
names(h1n1.Bris.aa.sortedtable)<-aaa(names(h1n1.Bris.aa.sortedtable))
|
|
dotchart(h1n1.Bris.aa.sortedtable)
|
|
paste(h1n1.Bris.aa.vec,collapse="",sep="")
|
|
|
|
|
|
#### Part B: Loop Recursion Warmup
|
|
calc.num.paths <- function(n,m){
|
|
path_matrix <- matrix(1, nrow=m+1, ncol=n+1)
|
|
for (i in seq(2,m+1)){
|
|
for (j in seq(2, n+1)){
|
|
path_matrix[i,j] <- path_matrix[i-1,j] + path_matrix[i,j-1]
|
|
}
|
|
}
|
|
path_matrix[m+1,n+1]
|
|
}
|
|
|
|
m <- 5 # row edges
|
|
n <- 5 # col edges
|
|
calc.num.paths(n,m)
|
|
factorial(n+m)/(factorial(n)*factorial(m))
|
|
|
|
m <- 5 # row edges
|
|
n <- 6 # col edges
|
|
calc.num.paths(n,m)
|
|
factorial(n+m)/(factorial(n)*factorial(m))
|
|
|
|
m <- 10 # row edges
|
|
n <- 10 # col edges
|
|
calc.num.paths(n,m)
|
|
factorial(n+m)/(factorial(n)*factorial(m))
|
|
|
|
|