diff --git a/CY030230.1.fasta b/CY030230.1.fasta new file mode 100644 index 0000000..ae23984 --- /dev/null +++ b/CY030230.1.fasta @@ -0,0 +1,31 @@ +>ENA|CY030230|CY030230.1 Influenza A virus (A/Brisbane/59/2007(H1N1)) segment 4 sequence. +AGCAAAAGCAGGGGATAATAAAAACAACCAGAATGAAAGTAAAACTACTGGTCCTGTTAT +GCACATTTACAGCTACATATGCAGACACAATATGTATAGGCTACCATGCTAACAACTCGA +CCGACACTGTTGACACAGTACTTGAAAAGAATGTGACAGTGACACACTCTGTCAACCTGC +TTGAGAACAGTCACAATGGAAAACTATGTCTATTAAAAGGAATAGCCCCACTACAATTGG +GTAATTGCAGCGTTGCCGGGTGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCA +AGGAGTCATGGTCCTACATTGTAGAAAAACCAAATCCTGAGAATGGAACATGTTACCCAG +GGCATTTCGCTGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGA +GGTTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCGGAGTGTCAG +CATCATGCTCCCATAATGGGGAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGA +AGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGCAAACAACAAAGAAAAAGAAGTCC +TTGTACTATGGGGTGTTCATCACCCGCCAAACATAGGTGACCAAAAGGCCCTCTATCATA +CAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAAATTCACCCCAGAAA +TAGCCAAAAGACCCAAAGTAAGAGATCAAGAAGGAAGAATCAATTACTACTGGACTCTGC +TTGAACCCGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCAAGATATG +CTTTCGCACTGAGTAGAGGCTTTGGATCAGGAATCATCAACTCAAATGCACCAATGGATA +AATGTGATGCGAAGTGCCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGA +ACGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGA +TGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTG +CCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGTTGGTATGGTTATCATCATC +AGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAATG +GGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCAGTGG +GCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTGAATAAAAAAGTTGATGATG +GGTTTATAGACATTTGGACATATAATGCAGAACTGTTGGTTCTACTGGAAAATGAAAGGA +CTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAGTTAA +AGAATAATGCTAAAGAAATAGGAAATGGGTGTTTTGAATTCTATCACAAGTGTAACGATG +AATGCATGGAGAGTGTAAAGAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAA +AGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTC +TGGCGATCTACTCAACAGTCGCCAGTTCTCTGGTTCTTTTGGTCTCCCTGGGGGCAATCA +GCTTCTGGATGTGTTCCAATGGGTCTTTACAGTGTAGAATATGCATCTAAGACCAGAATT +TCAGAAATATAAGGAAAAACA diff --git a/FJ969540.1.fasta b/FJ969540.1.fasta new file mode 100644 index 0000000..d558970 --- /dev/null +++ b/FJ969540.1.fasta @@ -0,0 +1,30 @@ +>ENA|FJ969540|FJ969540.1 Influenza A virus (A/California/07/2009(H1N1)) segment 4 hemagglutinin (HA) gene, complete cds. +ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTA +TGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAAT +GTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAA +CTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGA +AATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCT +AGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAG +CAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGG +CCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGC +TTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAA +TCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCT +ACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCA +TCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGRTCRA +GAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAA +GCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCT +GGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAG +GGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGT +CCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCT +ATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGG +ATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCC +GACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATT +GAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGA +ATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCC +GAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAG +AACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGC +TGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACT +TATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTA +AAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCA +TTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTA +CAGTGTAGAATATGTATTTAA diff --git a/Schrick-Noah_CS-6643_Lab-8.R b/Schrick-Noah_CS-6643_Lab-8.R index e69de29..81825cc 100644 --- a/Schrick-Noah_CS-6643_Lab-8.R +++ b/Schrick-Noah_CS-6643_Lab-8.R @@ -0,0 +1,51 @@ +# Lab 8 for the University of Tulsa's CS-6643 Bioinformatics Course +# Pairwise Sequence Alignment +# Professor: Dr. McKinney, Fall 2022 +# Noah L. Schrick - 1492657 + +## Set Working Directory to file directory - RStudio approach +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) + +#### Part A: EMBOSS pairwise alignment server and influenza +## Load associated supportive libraries +if (!require("seqinr")) install.packages("seqinr") +library(seqinr) + +## Load in the fasta file +fasta2vec <- function(fasta.file){ + if (!require("seqinr")) install.packages("seqinr") + library(seqinr) + fasta <- read.fasta(file=fasta.file, as.string= TRUE) + fasta.string <- fasta[[1]][1] + fasta.list <- strsplit(fasta.string,"") + fasta.vec <- unlist(fasta.list) +} + +h1n1.Cali.dna.vec <- fasta2vec("FJ969540.1.fasta") +h1n1.Bris.dna.vec <- fasta2vec("CY030230.1.fasta") + +## Convert DNA seq into amino acid seq +h1n1.Cali.aa.vec<-seqinr::translate(h1n1.Cali.dna.vec) +h1n1.Cali.aa.table<-table(h1n1.Cali.aa.vec) # aa count table + +## Create dotchart of amino acid freqs +# sort the table (smallest to largest) +h1n1.Cali.aa.sortedtable <-h1n1.Cali.aa.table[order(h1n1.Cali.aa.table)] +# convert the AA table names to 3-letter code +# You can ignore the warning or remove the offending letters from +# the table +names(h1n1.Cali.aa.sortedtable)<-aaa(names(h1n1.Cali.aa.sortedtable)) +dotchart(h1n1.Cali.aa.sortedtable) + +# Repeating for Brisbane and accounting for shift in start codon +h1n1.Bris.aa.vec <- seqinr::translate(h1n1.Bris.dna.vec) +h1n1.Bris.aa.vec <- h1n1.Bris.aa.vec[-seq(1, match('M', h1n1.Bris.aa.vec)-1)] + +h1n1.Bris.aa.table<-table(h1n1.Bris.aa.vec) # aa count table +h1n1.Bris.aa.sortedtable <-h1n1.Bris.aa.table[order(h1n1.Bris.aa.table)] + +names(h1n1.Bris.aa.sortedtable)<-aaa(names(h1n1.Bris.aa.sortedtable)) +dotchart(h1n1.Bris.aa.sortedtable) +paste(h1n1.Bris.aa.vec,collapse="",sep="") +# + diff --git a/Schrick-Noah_CS-6643_Lab-8.doc b/Schrick-Noah_CS-6643_Lab-8.doc index 7f745c5..f2d1236 100644 Binary files a/Schrick-Noah_CS-6643_Lab-8.doc and b/Schrick-Noah_CS-6643_Lab-8.doc differ