################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) alpha.LM # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) plot(yeast) hist(yeast) hist(g.vec) g.pois g.mean alpha.LM alpha.ML degree(g) sort(degree(g)) sort(degree(g),decreasing=FALSE) sort(degree(g),decreasing=F) sort(degree(g),decreasing=false) sort(degree(g), decreasing = TRUE) head(sort(degree(g), decreasing = TRUE)) stddev(degree(g)) sd(degree(g)) tail(sort(degree(g), decreasing = TRUE)) plot(log(g.breaks.clean), log(g.probs.clean)) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) plot(log(g.breaks.clean), log(g.probs.clean)) g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] plot(log(g.breaks.clean), log(g.probs.clean)) if (!require("BiocManager")) install.packages("BiocManager") library(BiocManager) if (!require("Biostrings")) BiocManager::install("Biostrings") library(snpStats) # Lab 7 for the University of Tulsa's CS-6643 Bioinformatics Course # PDB # Professor: Dr. McKinney, Fall 2022 # Noah L. Schrick - 1492657 ## Set Working Directory to file directory - RStudio approach setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #### Part A: Obtaining PDB - no supporting R Code #### Part B: Visualize the 3D structure ## Install Rpdb and load the pdb if (!require("Rpdb")) install.packages("Rpdb") library(Rpdb) x<-read.pdb("1TGH.pdb") natom(x) visualize(x,type="l") ## Visualize the B and C chains B_chain_pdb <- subset(x$atoms, x$atoms$chainid=="B") C_chain_pdb <- subset(x$atoms, x$atoms$chainid=="C") # remove water: C_chain_pdb <- subset(C_chain_pdb,C_chain_pdb$resname!="HOH") # visualize chains B and C BC_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | x$atoms$chainid=="C") color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb))) visualize(BC_chains_pdb,col=color.vec) addResLab(BC_chains_pdb) ## Visualize B-C and A Chains A_chain_pdb <- subset(x$atoms, x$atoms$chainid=="A") # remove water A_chain_pdb <- subset(A_chain_pdb, A_chain_pdb$resname!="HOH") # visualize complex complex BCA_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | x$atoms$chainid=="C" | x$atoms$chainid=="A") BCA.color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb)),rep("blue",natom(A_chain_pdb))) visualize(BCA_chains_pdb,col=BCA.color.vec) #### Part C: Primary structure and DNA Palindromes # get coordinates of C1' atoms of the C-chain DNA molecule C_chain_pdb$resname C_chain_resids<-unique(C_chain_pdb$resid) C_chain_C1prime <- subset(C_chain_pdb, C_chain_pdb$elename=="C1'") # get chain C DNA sequence C_chain_sequence_messy <- C_chain_C1prime$resname C_chain_sequence <- paste(sapply(C_chain_sequence_messy,function(x) {unlist(strsplit(x,""))[2]}),collapse = "") if (!require("BiocManager")) install.packages("BiocManager") library(BiocManager) if (!require("Biostrings")) BiocManager::install("Biostrings") library(snpStats) C_chain_DNAString <- DNAString(C_chain_sequence) dna.pals <- findPalindromes(C_chain_DNAString, min.armlength=3, max.looplength=5, max.mismatch = 0) dna.pals # Lab 7 for the University of Tulsa's CS-6643 Bioinformatics Course # PDB # Professor: Dr. McKinney, Fall 2022 # Noah L. Schrick - 1492657 ## Set Working Directory to file directory - RStudio approach setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #### Part A: Obtaining PDB - no supporting R Code #### Part B: Visualize the 3D structure ## Install Rpdb and load the pdb if (!require("Rpdb")) install.packages("Rpdb") library(Rpdb) x<-read.pdb("1TGH.pdb") natom(x) visualize(x,type="l") ## Visualize the B and C chains B_chain_pdb <- subset(x$atoms, x$atoms$chainid=="B") C_chain_pdb <- subset(x$atoms, x$atoms$chainid=="C") # remove water: C_chain_pdb <- subset(C_chain_pdb,C_chain_pdb$resname!="HOH") # visualize chains B and C BC_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | x$atoms$chainid=="C") color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb))) visualize(BC_chains_pdb,col=color.vec) addResLab(BC_chains_pdb) ## Visualize B-C and A Chains A_chain_pdb <- subset(x$atoms, x$atoms$chainid=="A") # remove water A_chain_pdb <- subset(A_chain_pdb, A_chain_pdb$resname!="HOH") # visualize complex complex BCA_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | x$atoms$chainid=="C" | x$atoms$chainid=="A") BCA.color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb)),rep("blue",natom(A_chain_pdb))) visualize(BCA_chains_pdb,col=BCA.color.vec) #### Part C: Primary structure and DNA Palindromes # get coordinates of C1' atoms of the C-chain DNA molecule C_chain_pdb$resname C_chain_resids<-unique(C_chain_pdb$resid) C_chain_C1prime <- subset(C_chain_pdb, C_chain_pdb$elename=="C1'") # get chain C DNA sequence C_chain_sequence_messy <- C_chain_C1prime$resname C_chain_sequence <- paste(sapply(C_chain_sequence_messy,function(x) {unlist(strsplit(x,""))[2]}),collapse = "") ## Find palindromes if (!require("BiocManager")) install.packages("BiocManager") library(BiocManager) if (!require("Biostrings")) BiocManager::install("Biostrings") library(snpStats) C_chain_DNAString <- DNAString(C_chain_sequence) dna.pals <- findPalindromes(C_chain_DNAString, min.armlength=3, max.looplength=5, max.mismatch = 0) visualize(x,type="l") #### Part D: Find the binding site ## Get size of C chain coords dim(C_chain_C1prime_coords) #### Part D: Find the binding site ## Get Coordinates C_chain_C1prime_coords <- coords(C_chain_C1prime) dim(C_chain_C1prime_coords) ?coords rownames(C_chain_C1prime_coords) colnames(C_chain_C1prime_coords) C_chain_C1prime_coords[1][1] C_chain_C1prime # get coordinates of CA atoms of the A-chain protein molecule A_chain_sequence_3letter <- A_chain_pdb$resname A_chain_resids<-unique(A_chain_pdb$resid) A_chain_CA <- subset(A_chain_pdb, A_chain_pdb$elename=="CA") A_chain_CA_coords <- coords(A_chain_CA) dim(A_chain_CA_coords) outer(1:nrow(chain1), 1:nrow(chain2), Vectorize(function(i,j) { dist(rbind(chain1[i,],chain2[j,])) } ))} outer(1:nrow(chain1), 1:nrow(chain2), Vectorize(function(i,j) { dist(rbind(chain1[i,],chain2[j,])) }))} outer(1:nrow(chain1), 1:nrow(chain2), Vectorize(function(i,j) { dist(rbind(chain1[i,],chain2[j,])) }))} dist(rbind(chain1[i,],chain2[j,]))}))} outer(1:nrow(chain1), 1:nrow(chain2), Vectorize(function(i,j) {dist(rbind(chain1[i,],chain2[j,]))}))} # create distance matrix between chains pair.dist <- function(chain1,chain2){outer(1:nrow(chain1),1:nrow(chain2),Vectorize(function(i,j) {dist(rbind(chain1[i,],chain2[j,]))}))} prot2DNAdistMat <- pair.dist(A_chain_CA_coords,C_chain_C1prime_coords) dim(prot2DNAdistMat) rownames(prot2DNAdistMat) prot2DNAdistMat[1] prot2DNAdistMat vectorize Vectorize dim(A_chain_CA_coords) colnames(A_chain_CA_coords) rownames(A_chain_CA_coords) A_chain_CA_coords[1] A_chain_CA nrow(A_chain_CA_coords) # ij location of min in current matrix (2-elt vector) min_dist <- min(prot2DNAdistMat) min_dist min_ij <- which(prot2DNAdistMat == min_dist, arr.ind = TRUE) min_ij A_chain_sequence_3letter[min_ij[1]] # closest A-chain residue strsplit(C_chain_sequence,"")[[1]][min_ij[2]] # closest C-chain residue ?visualize # color binding residues CA_chains_pdb <- subset(x$atoms, x$atoms$chainid == "C" | x$atoms$chainid == "A") CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("blue", natom(A_chain_pdb))) CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "purple" visualize(CA_chains_pdb, col=CA.color.vec) # color binding residues CA_chains_pdb <- subset(x$atoms, x$atoms$chainid == "C" | x$atoms$chainid == "A") CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("blue", natom(A_chain_pdb))) CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "red" visualize(CA_chains_pdb, col=CA.color.vec) CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("teal", natom(A_chain_pdb))) CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "red" visualize(CA_chains_pdb, col=CA.color.vec) CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("lightblue", natom(A_chain_pdb))) CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "red" visualize(CA_chains_pdb, col=CA.color.vec) rgl.postscript("binding_site.pdf", "pdf", drawText=TRUE) #### Part E: Palindromes in other organisms ## Load associated supportive libraries if (!require("seqinr")) install.packages("seqinr") library(seqinr) ## Load in the fasta file as a string myfasta <- read.fasta(file="sequence.fasta", as.string= TRUE) myfasta ## Load in the fasta file as a string myfasta <- read.fasta(file="sequence.fasta", as.string= TRUE)[[1]][1] myfasta fasta_DNAString <- DNAString(myfasta) dna.pals <- findPalindromes(fasta_DNAString, min.armlength=5) fasta.dna.pals <- findPalindromes(fasta_DNAString, min.armlength=5) fasta.dna.pals rc BiocManager::install("insect") BiocManager::remove("insect") BiocManager::uninstall("insect") BiocManager::delete("insect") remove.packages("insect") ## Reverse and complement with the "rc" function from insect fasta.dna.pals.rev <- rev(fasta.dna.pals) dnachars <- strsplit("ACGT", split = "")[[1]] comps <- strsplit("TGCA", split = "")[[1]] fasta.dna.pals.rev fasta.dna.pals.rev[1] fasta.dna.pals.rev[4 ] fasta.dna.pals.rev[1][4] fasta.dna.pals.rev[1][1] fasta.dna.pals.rev$views class(fasta.dna.pals.rev) ?Biostrings toString(fasta.dna.pals.rev) ## Reverse and complement with the "rc" function from insect fasta.dna.pals.rev <- rev(toString(fasta.dna.pals)) dnachars <- strsplit("ACGT", split = "")[[1]] comps <- strsplit("TGCA", split = "")[[1]] fasta.dna.pals.rev fasta.dna.pals toString(fasta.dna.pals) toString(fasta.dna.pals) ## Reverse and complement with the "rc" function from insect fasta.dna.pals.rev <- rev(toString(fasta.dna.pals)) fasta.dna.pals.rev ## Reverse and complement with the "rc" function from insect rev(strsplit(toString(fasta.dna.pals), split = "")[[1]]) paste(rev(toString(fasta.dna.pals)),collapse='') ?rev ## Reverse and complement with the "rc" function from insect paste(rev(strsplit(toString(fasta.dna.pals), split = "")[[1]]), collapse='') fasta.dna.pals.rev dnachars <- strsplit("ACGT", split = "")[[1]] comps <- strsplit("TGCA", split = "")[[1]] fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dchars] fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] fasta.dna.pals.rc fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] fasta.dna.pals.rc fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") fasta.dna.pals.rc fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") fasta.dna.pals.rc ## Reverse and complement #Convert pal to str, split on each char, rev, then join back as a single str fasta.dna.pals.rev <- rev(strsplit(toString(fasta.dna.pals), split = "")[[1]]) fasta.dna.pals.rev dnachars <- strsplit("ACGT", split = "")[[1]] comps <- strsplit("TGCA", split = "")[[1]] fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") fasta.dna.pals.rev fasta.dna.pals.rc # From the rc function in the insect package. Modified for these variables dnachars <- strsplit("ACGT", split = "")[[1]] comps <- strsplit("TGCA", split = "")[[1]] fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") fasta.dna.pals.rc toString(fasta.dna.pals) fasta.dna.pals.rev fasta.dna.pals.rc fasta.dna.pals == fasta.dna.pals.rc toString(fasta.dna.pals) == fasta.dna.pals.rc