# Lab 6 for the University of Tulsa's CS-6643 Bioinformatics Course # GWAS # Professor: Dr. McKinney, Fall 2022 # Noah L. Schrick - 1492657 ## Set Working Directory to file directory - RStudio approach setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #### Part 0: PLINK if (!require("BiocManager")) install.packages("BiocManager") library(BiocManager) if (!require("snpStats")) BiocManager::install("snpStats") library(snpStats) ex.data <- read.pedfile(file="extra.ped", snps="extra.map") ex.data$fam phenotype <- ex.data$fam$affected-1 # change pheno from 1/2 to 0/1 genotypes <- ex.data$genotypes # encoded as AA/AB/BB snp.ids <- as.character(ex.data$map$snp.names) genotypes.df <- data.frame(as(genotypes, "character")) colnames(genotypes.df) <- snp.ids # observed contingency table for SNP rs630969 table(phenotype,genotypes.df$rs630969, dnn=c("phenotype","genotype")) # dnn dimension names of table dim(genotypes.df) #### Part A: Chi-Square Test # creates list of observed contingency tables for all SNPs # sapply acts on each column of genotypes.df observed.tables.list <- sapply(genotypes.df, function(x) table(phenotype,x,dnn=c("phenotype","genotype"))) test.table <- observed.tables.list$rs634228 genoMarg.vec <- colSums(test.table) # margin vector phenoMarg.vec <- rowSums(test.table) # margin vector totalSubj <- sum(genoMarg.vec) # total subjects expect.test <- outer(phenoMarg.vec,genoMarg.vec/totalSubj,'*') ## Fisher Test # Fisher exact test (chi-square test) for all SNPs fish_fn <- function(i){ cbind(snp.ids[i], fisher.test(observed.tables.list[[i]])$p.value) } # apply fisher exact test to all SNPs fish.df <- data.frame(t(sapply(1:ncol(genotypes.df), fish_fn))) colnames(fish.df) <- c("rs", "p_value") # sort SNPs by Fisher exact p-value if (!require("dplyr")) install.packages("dplyr") library(dplyr) fish.results <- fish.df %>% mutate_at("p_value", as.character) %>% mutate_at("p_value", as.numeric) %>% arrange(p_value) print(fish.results)