diff --git a/.Rhistory b/.Rhistory index 8dc0731..0b49493 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,218 +1,3 @@ -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1,2)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1,2,3)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -#g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.breaks <- g.hist$breaks # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.breaks[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) -################# Max-Log-Likelihood ################# -n <- length(g.breaks.clean) -kmin <- g.breaks.clean[1] -alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) -alpha.ML -lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) -# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course -# Degree Distribution -# Professor: Dr. McKinney, Spring 2022 -# Noah Schrick - 1492657 -library(igraph) -library(igraphdata) -data(yeast) -g <- yeast -g.netname <- "Yeast" -################# Set up Work ################# -g.vec <- degree(g) -g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, -" Network")) -legend("topright", c("Guess", "Poisson", "Least-Squares Fit", -"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", -"#006CD1", "#E66100", "#D35FB7")) -g.mean <- mean(g.vec) -g.seq <- 0:max(g.vec) # x-axis -################# Guessing Alpha ################# -alpha.guess <- 1.5 -lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) -################# Poisson ################# -g.pois <- dpois(g.seq, g.mean, log=F) -lines(g.seq, g.pois, col="#006CD1", lty=2) -################# Linear model: Least-Squares Fit ################# -g.breaks <- g.hist$breaks[-c(1)] # remove 0 -g.probs <- g.hist$density[-1] # make lengths match -# Need to clean up probabilities that are 0 -nz.probs.mask <- g.probs!=0 -g.breaks.clean <- g.breaks[nz.probs.mask] -g.probs.clean <- g.probs[nz.probs.mask] -#plot(log(g.breaks.clean), log(g.probs.clean)) -g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) -summary(g.fit) -alpha.LM <- coef(g.fit)[2] -lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] @@ -452,20 +237,10 @@ plot(log(g.breaks.clean), log(g.probs.clean)) g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] plot(log(g.breaks.clean), log(g.probs.clean)) -# Lab 7 for the University of Tulsa's CS-6643 Bioinformatics Course -# PDB -# Professor: Dr. McKinney, Fall 2022 -# Noah L. Schrick - 1492657 -## Set Working Directory to file directory - RStudio approach -setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) -#### Part A: Obtaining PDB - no supporting R Code -#### Part B: Visualize the 3D structure -## Install Rpdb and load the pdb -if (!require("Rpdb")) install.packages("Rpdb") -library(Rpdb) -x<-read.pdb("1TGH.pdb") -## Visualize the B and C chains -B_chain_pdb <- subset(x$atoms, x$atoms$chainid=="B") +if (!require("BiocManager")) install.packages("BiocManager") +library(BiocManager) +if (!require("Biostrings")) BiocManager::install("Biostrings") +library(snpStats) # Lab 7 for the University of Tulsa's CS-6643 Bioinformatics Course # PDB # Professor: Dr. McKinney, Fall 2022 @@ -490,7 +265,6 @@ BC_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb))) visualize(BC_chains_pdb,col=color.vec) addResLab(BC_chains_pdb) -rgl.postscript("BC_chains.pdf","pdf",drawText=TRUE) ## Visualize B-C and A Chains A_chain_pdb <- subset(x$atoms, x$atoms$chainid=="A") # remove water @@ -500,7 +274,7 @@ BCA_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | x$atoms$chainid=="C" | x$atoms$chainid=="A") BCA.color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb)),rep("blue",natom(A_chain_pdb))) visualize(BCA_chains_pdb,col=BCA.color.vec) -rgl.postscript("full_complex.pdf","pdf",drawText=TRUE) +#### Part C: Primary structure and DNA Palindromes # get coordinates of C1' atoms of the C-chain DNA molecule C_chain_pdb$resname C_chain_resids<-unique(C_chain_pdb$resid) @@ -508,5 +282,231 @@ C_chain_C1prime <- subset(C_chain_pdb, C_chain_pdb$elename=="C1'") # get chain C DNA sequence C_chain_sequence_messy <- C_chain_C1prime$resname C_chain_sequence <- paste(sapply(C_chain_sequence_messy,function(x) {unlist(strsplit(x,""))[2]}),collapse = "") -C_chain_sequence_messy -C_chain_sequence +if (!require("BiocManager")) install.packages("BiocManager") +library(BiocManager) +if (!require("Biostrings")) BiocManager::install("Biostrings") +library(snpStats) +C_chain_DNAString <- DNAString(C_chain_sequence) +dna.pals <- findPalindromes(C_chain_DNAString, min.armlength=3, +max.looplength=5, max.mismatch = 0) +dna.pals +# Lab 7 for the University of Tulsa's CS-6643 Bioinformatics Course +# PDB +# Professor: Dr. McKinney, Fall 2022 +# Noah L. Schrick - 1492657 +## Set Working Directory to file directory - RStudio approach +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +#### Part A: Obtaining PDB - no supporting R Code +#### Part B: Visualize the 3D structure +## Install Rpdb and load the pdb +if (!require("Rpdb")) install.packages("Rpdb") +library(Rpdb) +x<-read.pdb("1TGH.pdb") +natom(x) +visualize(x,type="l") +## Visualize the B and C chains +B_chain_pdb <- subset(x$atoms, x$atoms$chainid=="B") +C_chain_pdb <- subset(x$atoms, x$atoms$chainid=="C") +# remove water: +C_chain_pdb <- subset(C_chain_pdb,C_chain_pdb$resname!="HOH") +# visualize chains B and C +BC_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | x$atoms$chainid=="C") +color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb))) +visualize(BC_chains_pdb,col=color.vec) +addResLab(BC_chains_pdb) +## Visualize B-C and A Chains +A_chain_pdb <- subset(x$atoms, x$atoms$chainid=="A") +# remove water +A_chain_pdb <- subset(A_chain_pdb, A_chain_pdb$resname!="HOH") +# visualize complex complex +BCA_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | +x$atoms$chainid=="C" | x$atoms$chainid=="A") +BCA.color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb)),rep("blue",natom(A_chain_pdb))) +visualize(BCA_chains_pdb,col=BCA.color.vec) +#### Part C: Primary structure and DNA Palindromes +# get coordinates of C1' atoms of the C-chain DNA molecule +C_chain_pdb$resname +C_chain_resids<-unique(C_chain_pdb$resid) +C_chain_C1prime <- subset(C_chain_pdb, C_chain_pdb$elename=="C1'") +# get chain C DNA sequence +C_chain_sequence_messy <- C_chain_C1prime$resname +C_chain_sequence <- paste(sapply(C_chain_sequence_messy,function(x) {unlist(strsplit(x,""))[2]}),collapse = "") +## Find palindromes +if (!require("BiocManager")) install.packages("BiocManager") +library(BiocManager) +if (!require("Biostrings")) BiocManager::install("Biostrings") +library(snpStats) +C_chain_DNAString <- DNAString(C_chain_sequence) +dna.pals <- findPalindromes(C_chain_DNAString, min.armlength=3, +max.looplength=5, max.mismatch = 0) +visualize(x,type="l") +#### Part D: Find the binding site +## Get size of C chain coords +dim(C_chain_C1prime_coords) +#### Part D: Find the binding site +## Get Coordinates +C_chain_C1prime_coords <- coords(C_chain_C1prime) +dim(C_chain_C1prime_coords) +?coords +rownames(C_chain_C1prime_coords) +colnames(C_chain_C1prime_coords) +C_chain_C1prime_coords[1][1] +C_chain_C1prime +# get coordinates of CA atoms of the A-chain protein molecule +A_chain_sequence_3letter <- A_chain_pdb$resname +A_chain_resids<-unique(A_chain_pdb$resid) +A_chain_CA <- subset(A_chain_pdb, A_chain_pdb$elename=="CA") +A_chain_CA_coords <- coords(A_chain_CA) +dim(A_chain_CA_coords) +outer(1:nrow(chain1), +1:nrow(chain2), +Vectorize(function(i,j) { +dist(rbind(chain1[i,],chain2[j,])) +} +))} +outer(1:nrow(chain1), +1:nrow(chain2), +Vectorize(function(i,j) { +dist(rbind(chain1[i,],chain2[j,])) +}))} +outer(1:nrow(chain1), +1:nrow(chain2), +Vectorize(function(i,j) { +dist(rbind(chain1[i,],chain2[j,])) +}))} +dist(rbind(chain1[i,],chain2[j,]))}))} +outer(1:nrow(chain1), +1:nrow(chain2), Vectorize(function(i,j) {dist(rbind(chain1[i,],chain2[j,]))}))} +# create distance matrix between chains +pair.dist <- function(chain1,chain2){outer(1:nrow(chain1),1:nrow(chain2),Vectorize(function(i,j) {dist(rbind(chain1[i,],chain2[j,]))}))} +prot2DNAdistMat <- pair.dist(A_chain_CA_coords,C_chain_C1prime_coords) +dim(prot2DNAdistMat) +rownames(prot2DNAdistMat) +prot2DNAdistMat[1] +prot2DNAdistMat +vectorize +Vectorize +dim(A_chain_CA_coords) +colnames(A_chain_CA_coords) +rownames(A_chain_CA_coords) +A_chain_CA_coords[1] +A_chain_CA +nrow(A_chain_CA_coords) +# ij location of min in current matrix (2-elt vector) +min_dist <- min(prot2DNAdistMat) +min_dist +min_ij <- which(prot2DNAdistMat == min_dist, arr.ind = TRUE) +min_ij +A_chain_sequence_3letter[min_ij[1]] # closest A-chain residue +strsplit(C_chain_sequence,"")[[1]][min_ij[2]] # closest C-chain residue +?visualize +# color binding residues +CA_chains_pdb <- subset(x$atoms, x$atoms$chainid == "C" | x$atoms$chainid == "A") +CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("blue", natom(A_chain_pdb))) +CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" +CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "purple" +visualize(CA_chains_pdb, col=CA.color.vec) +# color binding residues +CA_chains_pdb <- subset(x$atoms, x$atoms$chainid == "C" | x$atoms$chainid == "A") +CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("blue", natom(A_chain_pdb))) +CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" +CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "red" +visualize(CA_chains_pdb, col=CA.color.vec) +CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("teal", natom(A_chain_pdb))) +CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" +CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "red" +visualize(CA_chains_pdb, col=CA.color.vec) +CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("lightblue", natom(A_chain_pdb))) +CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" +CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "red" +visualize(CA_chains_pdb, col=CA.color.vec) +rgl.postscript("binding_site.pdf", "pdf", drawText=TRUE) +#### Part E: Palindromes in other organisms +## Load associated supportive libraries +if (!require("seqinr")) install.packages("seqinr") +library(seqinr) +## Load in the fasta file as a string +myfasta <- read.fasta(file="sequence.fasta", as.string= TRUE) +myfasta +## Load in the fasta file as a string +myfasta <- read.fasta(file="sequence.fasta", as.string= TRUE)[[1]][1] +myfasta +fasta_DNAString <- DNAString(myfasta) +dna.pals <- findPalindromes(fasta_DNAString, min.armlength=5) +fasta.dna.pals <- findPalindromes(fasta_DNAString, min.armlength=5) +fasta.dna.pals +rc +BiocManager::install("insect") +BiocManager::remove("insect") +BiocManager::uninstall("insect") +BiocManager::delete("insect") +remove.packages("insect") +## Reverse and complement with the "rc" function from insect +fasta.dna.pals.rev <- rev(fasta.dna.pals) +dnachars <- strsplit("ACGT", split = "")[[1]] +comps <- strsplit("TGCA", split = "")[[1]] +fasta.dna.pals.rev +fasta.dna.pals.rev[1] +fasta.dna.pals.rev[4 +] +fasta.dna.pals.rev[1][4] +fasta.dna.pals.rev[1][1] +fasta.dna.pals.rev$views +class(fasta.dna.pals.rev) +?Biostrings +toString(fasta.dna.pals.rev) +## Reverse and complement with the "rc" function from insect +fasta.dna.pals.rev <- rev(toString(fasta.dna.pals)) +dnachars <- strsplit("ACGT", split = "")[[1]] +comps <- strsplit("TGCA", split = "")[[1]] +fasta.dna.pals.rev +fasta.dna.pals +toString(fasta.dna.pals) +toString(fasta.dna.pals) +## Reverse and complement with the "rc" function from insect +fasta.dna.pals.rev <- rev(toString(fasta.dna.pals)) +fasta.dna.pals.rev +## Reverse and complement with the "rc" function from insect +rev(strsplit(toString(fasta.dna.pals), split = "")[[1]]) +paste(rev(toString(fasta.dna.pals)),collapse='') +?rev +## Reverse and complement with the "rc" function from insect +paste(rev(strsplit(toString(fasta.dna.pals), split = "")[[1]]), collapse='') +fasta.dna.pals.rev +dnachars <- strsplit("ACGT", split = "")[[1]] +comps <- strsplit("TGCA", split = "")[[1]] +fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dchars] +fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] +fasta.dna.pals.rc +fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] +fasta.dna.pals.rc +fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") +fasta.dna.pals.rc +fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] +fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] +fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") +fasta.dna.pals.rc +## Reverse and complement +#Convert pal to str, split on each char, rev, then join back as a single str +fasta.dna.pals.rev <- rev(strsplit(toString(fasta.dna.pals), +split = "")[[1]]) +fasta.dna.pals.rev +dnachars <- strsplit("ACGT", split = "")[[1]] +comps <- strsplit("TGCA", split = "")[[1]] +fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] +fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] +fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") +fasta.dna.pals.rev +fasta.dna.pals.rc +# From the rc function in the insect package. Modified for these variables +dnachars <- strsplit("ACGT", split = "")[[1]] +comps <- strsplit("TGCA", split = "")[[1]] +fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] +fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] +fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") +fasta.dna.pals.rc +toString(fasta.dna.pals) +fasta.dna.pals.rev +fasta.dna.pals.rc +fasta.dna.pals == fasta.dna.pals.rc +toString(fasta.dna.pals) == fasta.dna.pals.rc diff --git a/.~lock.pdb_lab.docx# b/.~lock.pdb_lab.docx# deleted file mode 100644 index 184ce00..0000000 --- a/.~lock.pdb_lab.docx# +++ /dev/null @@ -1 +0,0 @@ -,noah,NovaArchSys,27.10.2022 18:54,file:///home/noah/.config/libreoffice/4; \ No newline at end of file diff --git a/Schrick-Noah_CS-6643_Lab-7_Report.pdf b/Schrick-Noah_CS-6643_Lab-7_Report.pdf new file mode 100644 index 0000000..972569b Binary files /dev/null and b/Schrick-Noah_CS-6643_Lab-7_Report.pdf differ diff --git a/Schrick-Noah_CS-6643_Lab7.R b/Schrick-Noah_CS-6643_Lab7.R index 925a834..6e4451b 100644 --- a/Schrick-Noah_CS-6643_Lab7.R +++ b/Schrick-Noah_CS-6643_Lab7.R @@ -93,4 +93,29 @@ CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("lightblue", natom(A_cha CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "red" visualize(CA_chains_pdb, col=CA.color.vec) -rgl.postscript("binding_site.pdf", "pdf", drawText=TRUE) \ No newline at end of file +rgl.postscript("binding_site.pdf", "pdf", drawText=TRUE) + +#### Part E: Palindromes in other organisms +## Load associated supportive libraries +if (!require("seqinr")) install.packages("seqinr") +library(seqinr) + +## Load in the fasta file as a string +myfasta <- read.fasta(file="sequence.fasta", as.string= TRUE)[[1]][1] +fasta_DNAString <- DNAString(myfasta) +fasta.dna.pals <- findPalindromes(fasta_DNAString, min.armlength=5) +toString(fasta.dna.pals) + +## Reverse and complement +# Convert pal to str, split on each char, rev. Leave broken up for %in% +fasta.dna.pals.rev <- rev(strsplit(toString(fasta.dna.pals), + split = "")[[1]]) +fasta.dna.pals.rev +# From the rc function in the insect package. Modified for these variables +dnachars <- strsplit("ACGT", split = "")[[1]] +comps <- strsplit("TGCA", split = "")[[1]] +fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] +fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] +fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") +fasta.dna.pals.rc +toString(fasta.dna.pals) == fasta.dna.pals.rc diff --git a/pdb_lab.docx b/pdb_lab.docx index faf4869..dfac8f8 100644 Binary files a/pdb_lab.docx and b/pdb_lab.docx differ