66 lines
1.7 KiB
R
66 lines
1.7 KiB
R
# Lab 1 for the University of Tulsa's CS-6643 Bioinformatics Course
|
|
# Intro to R, online bioinformatics resources, nucleotide frequency statistics
|
|
# Professor: Dr. McKinney, Fall 2022
|
|
# Noah L. Schrick - 1492657
|
|
|
|
#### Part A: Seq Function
|
|
## a
|
|
AAvec <- seq(from = 1, to = 33, by = 2)
|
|
|
|
## b
|
|
ABvec <- seq(from = 7, to = 40, length.out = 15)
|
|
|
|
## c
|
|
my.dna <- sample(c("A", "C", "G", "T"), size = 20, replace = T)
|
|
|
|
## d
|
|
my.dna.A <- length(which(my.dna == "A"))
|
|
|
|
## e
|
|
my.dna.table <- table(my.dna)
|
|
|
|
my.dna.table.df <- as.data.frame(my.dna.table)
|
|
cols <- rainbow(nrow(my.dna.table))
|
|
my.dna.table.df$percent <-
|
|
round(100*my.dna.table.df$Freq/sum(my.dna.table.df$Freq), digits = 1)
|
|
my.dna.table.df$label <- paste(my.dna.table.df$my.dna,
|
|
" (", my.dna.table.df$percent, "%)", sep = "")
|
|
pie(my.dna.table.df$Freq, labels = my.dna.table.df$label, col = cols,
|
|
main = "Pie Chart Representation of Random ACTG Sample")
|
|
|
|
|
|
bp <- barplot(as.matrix(my.dna.table), beside = TRUE, xlab = "Letter",
|
|
ylab = "Frequency", ylim = c(-1, max(as.numeric(my.dna.table))+2),
|
|
main = "Bar Plot Representation of Random ACTG Sample", col = cols,
|
|
legend = TRUE)
|
|
|
|
text(x = bp, y = my.dna.table + 0.5, labels = as.numeric(my.dna.table))
|
|
text(x = bp, y = -0.5, labels = names(my.dna.table))
|
|
|
|
## f
|
|
my.dna2 <- sample(c("A", "C", "G", "T"), size = 20, replace = T,
|
|
prob = c(0.1, 0.4, 0.4, 0.1))
|
|
my.dna2.table <- table(my.dna2)
|
|
my.dna2.table
|
|
|
|
|
|
#### Part B: NCBI (no supporting R code for this part)
|
|
|
|
#### Part C: Reading fasta files, nucelotide and dinucleotide frequencies
|
|
|
|
## Pre-cursor: Load associated supportive libraries
|
|
|
|
## 1
|
|
|
|
## 2
|
|
|
|
## 3
|
|
|
|
#### Part D: GC Content
|
|
|
|
## 1
|
|
|
|
#### Part E: Coronavirus
|
|
|
|
## 1
|