Finalizing EMBOSS pairwise alignment with influenza
This commit is contained in:
parent
d37c6cf3f9
commit
15c29fa1ef
31
CY030230.1.fasta
Normal file
31
CY030230.1.fasta
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
>ENA|CY030230|CY030230.1 Influenza A virus (A/Brisbane/59/2007(H1N1)) segment 4 sequence.
|
||||||
|
AGCAAAAGCAGGGGATAATAAAAACAACCAGAATGAAAGTAAAACTACTGGTCCTGTTAT
|
||||||
|
GCACATTTACAGCTACATATGCAGACACAATATGTATAGGCTACCATGCTAACAACTCGA
|
||||||
|
CCGACACTGTTGACACAGTACTTGAAAAGAATGTGACAGTGACACACTCTGTCAACCTGC
|
||||||
|
TTGAGAACAGTCACAATGGAAAACTATGTCTATTAAAAGGAATAGCCCCACTACAATTGG
|
||||||
|
GTAATTGCAGCGTTGCCGGGTGGATCTTAGGAAACCCAGAATGCGAATTACTGATTTCCA
|
||||||
|
AGGAGTCATGGTCCTACATTGTAGAAAAACCAAATCCTGAGAATGGAACATGTTACCCAG
|
||||||
|
GGCATTTCGCTGACTATGAGGAACTGAGGGAGCAATTGAGTTCAGTATCTTCATTTGAGA
|
||||||
|
GGTTCGAAATATTCCCCAAAGAAAGCTCATGGCCCAACCACACCGTAACCGGAGTGTCAG
|
||||||
|
CATCATGCTCCCATAATGGGGAAAGCAGTTTTTACAGAAATTTGCTATGGCTGACGGGGA
|
||||||
|
AGAATGGTTTGTACCCAAACCTGAGCAAGTCCTATGCAAACAACAAAGAAAAAGAAGTCC
|
||||||
|
TTGTACTATGGGGTGTTCATCACCCGCCAAACATAGGTGACCAAAAGGCCCTCTATCATA
|
||||||
|
CAGAAAATGCTTATGTCTCTGTAGTGTCTTCACATTATAGCAGAAAATTCACCCCAGAAA
|
||||||
|
TAGCCAAAAGACCCAAAGTAAGAGATCAAGAAGGAAGAATCAATTACTACTGGACTCTGC
|
||||||
|
TTGAACCCGGGGATACAATAATATTTGAGGCAAATGGAAATCTAATAGCGCCAAGATATG
|
||||||
|
CTTTCGCACTGAGTAGAGGCTTTGGATCAGGAATCATCAACTCAAATGCACCAATGGATA
|
||||||
|
AATGTGATGCGAAGTGCCAAACACCTCAGGGAGCTATAAACAGCAGTCTTCCTTTCCAGA
|
||||||
|
ACGTACACCCAGTCACAATAGGAGAGTGTCCAAAGTATGTCAGGAGTGCAAAATTAAGGA
|
||||||
|
TGGTTACAGGACTAAGGAACATCCCATCCATTCAATCCAGAGGTTTGTTTGGAGCCATTG
|
||||||
|
CCGGTTTCATTGAAGGGGGGTGGACTGGAATGGTAGATGGTTGGTATGGTTATCATCATC
|
||||||
|
AGAATGAGCAAGGATCTGGCTATGCTGCAGATCAAAAAAGCACACAAAATGCCATTAATG
|
||||||
|
GGATTACAAACAAGGTGAATTCTGTAATTGAGAAAATGAACACTCAATTCACAGCAGTGG
|
||||||
|
GCAAAGAATTCAACAAATTGGAAAGAAGGATGGAAAACTTGAATAAAAAAGTTGATGATG
|
||||||
|
GGTTTATAGACATTTGGACATATAATGCAGAACTGTTGGTTCTACTGGAAAATGAAAGGA
|
||||||
|
CTTTGGATTTCCATGACTCCAATGTGAAGAATCTGTATGAGAAAGTAAAAAGCCAGTTAA
|
||||||
|
AGAATAATGCTAAAGAAATAGGAAATGGGTGTTTTGAATTCTATCACAAGTGTAACGATG
|
||||||
|
AATGCATGGAGAGTGTAAAGAATGGAACTTATGACTATCCAAAATATTCCGAAGAATCAA
|
||||||
|
AGTTAAACAGGGAGAAAATTGATGGAGTGAAATTGGAATCAATGGGAGTCTATCAGATTC
|
||||||
|
TGGCGATCTACTCAACAGTCGCCAGTTCTCTGGTTCTTTTGGTCTCCCTGGGGGCAATCA
|
||||||
|
GCTTCTGGATGTGTTCCAATGGGTCTTTACAGTGTAGAATATGCATCTAAGACCAGAATT
|
||||||
|
TCAGAAATATAAGGAAAAACA
|
||||||
30
FJ969540.1.fasta
Normal file
30
FJ969540.1.fasta
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
>ENA|FJ969540|FJ969540.1 Influenza A virus (A/California/07/2009(H1N1)) segment 4 hemagglutinin (HA) gene, complete cds.
|
||||||
|
ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTA
|
||||||
|
TGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAAT
|
||||||
|
GTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAA
|
||||||
|
CTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGA
|
||||||
|
AATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCT
|
||||||
|
AGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAG
|
||||||
|
CAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGG
|
||||||
|
CCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGC
|
||||||
|
TTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAA
|
||||||
|
TCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCT
|
||||||
|
ACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCA
|
||||||
|
TCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGRTCRA
|
||||||
|
GAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAA
|
||||||
|
GCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCT
|
||||||
|
GGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAG
|
||||||
|
GGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGT
|
||||||
|
CCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCT
|
||||||
|
ATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGG
|
||||||
|
ATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCC
|
||||||
|
GACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATT
|
||||||
|
GAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGA
|
||||||
|
ATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCC
|
||||||
|
GAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAG
|
||||||
|
AACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGC
|
||||||
|
TGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACT
|
||||||
|
TATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTA
|
||||||
|
AAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCA
|
||||||
|
TTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTA
|
||||||
|
CAGTGTAGAATATGTATTTAA
|
||||||
@ -0,0 +1,51 @@
|
|||||||
|
# Lab 8 for the University of Tulsa's CS-6643 Bioinformatics Course
|
||||||
|
# Pairwise Sequence Alignment
|
||||||
|
# Professor: Dr. McKinney, Fall 2022
|
||||||
|
# Noah L. Schrick - 1492657
|
||||||
|
|
||||||
|
## Set Working Directory to file directory - RStudio approach
|
||||||
|
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
||||||
|
|
||||||
|
#### Part A: EMBOSS pairwise alignment server and influenza
|
||||||
|
## Load associated supportive libraries
|
||||||
|
if (!require("seqinr")) install.packages("seqinr")
|
||||||
|
library(seqinr)
|
||||||
|
|
||||||
|
## Load in the fasta file
|
||||||
|
fasta2vec <- function(fasta.file){
|
||||||
|
if (!require("seqinr")) install.packages("seqinr")
|
||||||
|
library(seqinr)
|
||||||
|
fasta <- read.fasta(file=fasta.file, as.string= TRUE)
|
||||||
|
fasta.string <- fasta[[1]][1]
|
||||||
|
fasta.list <- strsplit(fasta.string,"")
|
||||||
|
fasta.vec <- unlist(fasta.list)
|
||||||
|
}
|
||||||
|
|
||||||
|
h1n1.Cali.dna.vec <- fasta2vec("FJ969540.1.fasta")
|
||||||
|
h1n1.Bris.dna.vec <- fasta2vec("CY030230.1.fasta")
|
||||||
|
|
||||||
|
## Convert DNA seq into amino acid seq
|
||||||
|
h1n1.Cali.aa.vec<-seqinr::translate(h1n1.Cali.dna.vec)
|
||||||
|
h1n1.Cali.aa.table<-table(h1n1.Cali.aa.vec) # aa count table
|
||||||
|
|
||||||
|
## Create dotchart of amino acid freqs
|
||||||
|
# sort the table (smallest to largest)
|
||||||
|
h1n1.Cali.aa.sortedtable <-h1n1.Cali.aa.table[order(h1n1.Cali.aa.table)]
|
||||||
|
# convert the AA table names to 3-letter code
|
||||||
|
# You can ignore the warning or remove the offending letters from
|
||||||
|
# the table
|
||||||
|
names(h1n1.Cali.aa.sortedtable)<-aaa(names(h1n1.Cali.aa.sortedtable))
|
||||||
|
dotchart(h1n1.Cali.aa.sortedtable)
|
||||||
|
|
||||||
|
# Repeating for Brisbane and accounting for shift in start codon
|
||||||
|
h1n1.Bris.aa.vec <- seqinr::translate(h1n1.Bris.dna.vec)
|
||||||
|
h1n1.Bris.aa.vec <- h1n1.Bris.aa.vec[-seq(1, match('M', h1n1.Bris.aa.vec)-1)]
|
||||||
|
|
||||||
|
h1n1.Bris.aa.table<-table(h1n1.Bris.aa.vec) # aa count table
|
||||||
|
h1n1.Bris.aa.sortedtable <-h1n1.Bris.aa.table[order(h1n1.Bris.aa.table)]
|
||||||
|
|
||||||
|
names(h1n1.Bris.aa.sortedtable)<-aaa(names(h1n1.Bris.aa.sortedtable))
|
||||||
|
dotchart(h1n1.Bris.aa.sortedtable)
|
||||||
|
paste(h1n1.Bris.aa.vec,collapse="",sep="")
|
||||||
|
#
|
||||||
|
|
||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user