# Lab 7 for the University of Tulsa's CS-6643 Bioinformatics Course # PDB # Professor: Dr. McKinney, Fall 2022 # Noah L. Schrick - 1492657 ## Set Working Directory to file directory - RStudio approach setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #### Part A: Obtaining PDB - no supporting R Code #### Part B: Visualize the 3D structure ## Install Rpdb and load the pdb if (!require("Rpdb")) install.packages("Rpdb") library(Rpdb) x<-read.pdb("1TGH.pdb") natom(x) visualize(x,type="l") ## Visualize the B and C chains B_chain_pdb <- subset(x$atoms, x$atoms$chainid=="B") C_chain_pdb <- subset(x$atoms, x$atoms$chainid=="C") # remove water: C_chain_pdb <- subset(C_chain_pdb,C_chain_pdb$resname!="HOH") # visualize chains B and C BC_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | x$atoms$chainid=="C") color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb))) visualize(BC_chains_pdb,col=color.vec) addResLab(BC_chains_pdb) ## Visualize B-C and A Chains A_chain_pdb <- subset(x$atoms, x$atoms$chainid=="A") # remove water A_chain_pdb <- subset(A_chain_pdb, A_chain_pdb$resname!="HOH") # visualize complex complex BCA_chains_pdb <- subset(x$atoms, x$atoms$chainid=="B" | x$atoms$chainid=="C" | x$atoms$chainid=="A") BCA.color.vec <- c(rep("red",natom(B_chain_pdb)),rep("green",natom(C_chain_pdb)),rep("blue",natom(A_chain_pdb))) visualize(BCA_chains_pdb,col=BCA.color.vec) #### Part C: Primary structure and DNA Palindromes # get coordinates of C1' atoms of the C-chain DNA molecule C_chain_pdb$resname C_chain_resids<-unique(C_chain_pdb$resid) C_chain_C1prime <- subset(C_chain_pdb, C_chain_pdb$elename=="C1'") # get chain C DNA sequence C_chain_sequence_messy <- C_chain_C1prime$resname C_chain_sequence <- paste(sapply(C_chain_sequence_messy,function(x) {unlist(strsplit(x,""))[2]}),collapse = "") ## Find palindromes if (!require("BiocManager")) install.packages("BiocManager") library(BiocManager) if (!require("Biostrings")) BiocManager::install("Biostrings") library(snpStats) C_chain_DNAString <- DNAString(C_chain_sequence) dna.pals <- findPalindromes(C_chain_DNAString, min.armlength=3, max.looplength=5, max.mismatch = 0) #### Part D: Find the binding site ## Get Coordinates C_chain_C1prime_coords <- coords(C_chain_C1prime) dim(C_chain_C1prime_coords) # get coordinates of CA atoms of the A-chain protein molecule A_chain_sequence_3letter <- A_chain_pdb$resname A_chain_resids<-unique(A_chain_pdb$resid) A_chain_CA <- subset(A_chain_pdb, A_chain_pdb$elename=="CA") A_chain_CA_coords <- coords(A_chain_CA) # create distance matrix between chains pair.dist <- function(chain1,chain2){outer(1:nrow(chain1),1:nrow(chain2),Vectorize(function(i,j) {dist(rbind(chain1[i,],chain2[j,]))}))} prot2DNAdistMat <- pair.dist(A_chain_CA_coords,C_chain_C1prime_coords) dim(prot2DNAdistMat) # ij location of min in current matrix (2-elt vector) min_dist <- min(prot2DNAdistMat) min_dist min_ij <- which(prot2DNAdistMat == min_dist, arr.ind = TRUE) min_ij A_chain_sequence_3letter[min_ij[1]] # closest A-chain residue strsplit(C_chain_sequence,"")[[1]][min_ij[2]] # closest C-chain residue # color binding residues CA_chains_pdb <- subset(x$atoms, x$atoms$chainid == "C" | x$atoms$chainid == "A") CA.color.vec <- c(rep("green", natom(C_chain_pdb)), rep("lightblue", natom(A_chain_pdb))) CA.color.vec[which(CA_chains_pdb$resid == min_ij[1])] <- "purple" CA.color.vec[which(CA_chains_pdb$resid == min_ij[2])] <- "red" visualize(CA_chains_pdb, col=CA.color.vec) rgl.postscript("binding_site.pdf", "pdf", drawText=TRUE) #### Part E: Palindromes in other organisms ## Load associated supportive libraries if (!require("seqinr")) install.packages("seqinr") library(seqinr) ## Load in the fasta file as a string myfasta <- read.fasta(file="sequence.fasta", as.string= TRUE)[[1]][1] fasta_DNAString <- DNAString(myfasta) fasta.dna.pals <- findPalindromes(fasta_DNAString, min.armlength=5) toString(fasta.dna.pals) ## Reverse and complement # Convert pal to str, split on each char, rev. Leave broken up for %in% fasta.dna.pals.rev <- rev(strsplit(toString(fasta.dna.pals), split = "")[[1]]) fasta.dna.pals.rev # From the rc function in the insect package. Modified for these variables dnachars <- strsplit("ACGT", split = "")[[1]] comps <- strsplit("TGCA", split = "")[[1]] fasta.dna.pals.rc <- fasta.dna.pals.rev[fasta.dna.pals.rev %in% dnachars] fasta.dna.pals.rc <- dnachars[match(fasta.dna.pals.rc, comps)] fasta.dna.pals.rc <- paste0(fasta.dna.pals.rc, collapse = "") fasta.dna.pals.rc toString(fasta.dna.pals) == fasta.dna.pals.rc