57 lines
2.0 KiB
R
57 lines
2.0 KiB
R
# Lab 6 for the University of Tulsa's CS-6643 Bioinformatics Course
|
|
# GWAS
|
|
# Professor: Dr. McKinney, Fall 2022
|
|
# Noah L. Schrick - 1492657
|
|
|
|
## Set Working Directory to file directory - RStudio approach
|
|
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
|
|
|
#### Part 0: PLINK
|
|
if (!require("BiocManager")) install.packages("BiocManager")
|
|
library(BiocManager)
|
|
if (!require("snpStats")) BiocManager::install("snpStats")
|
|
library(snpStats)
|
|
|
|
ex.data <- read.pedfile(file="extra.ped", snps="extra.map")
|
|
ex.data$fam
|
|
phenotype <- ex.data$fam$affected-1 # change pheno from 1/2 to 0/1
|
|
genotypes <- ex.data$genotypes # encoded as AA/AB/BB
|
|
snp.ids <- as.character(ex.data$map$snp.names)
|
|
genotypes.df <- data.frame(as(genotypes, "character"))
|
|
colnames(genotypes.df) <- snp.ids
|
|
# observed contingency table for SNP rs630969
|
|
table(phenotype,genotypes.df$rs630969,
|
|
dnn=c("phenotype","genotype")) # dnn dimension names of table
|
|
dim(genotypes.df)
|
|
|
|
#### Part A: Chi-Square Test
|
|
# creates list of observed contingency tables for all SNPs
|
|
# sapply acts on each column of genotypes.df
|
|
observed.tables.list <- sapply(genotypes.df, function(x)
|
|
table(phenotype,x,dnn=c("phenotype","genotype")))
|
|
|
|
test.table <- observed.tables.list$rs634228
|
|
genoMarg.vec <- colSums(test.table) # margin vector
|
|
phenoMarg.vec <- rowSums(test.table) # margin vector
|
|
totalSubj <- sum(genoMarg.vec) # total subjects
|
|
expect.test <- outer(phenoMarg.vec,genoMarg.vec/totalSubj,'*')
|
|
|
|
## Fisher Test
|
|
# Fisher exact test (chi-square test) for all SNPs
|
|
fish_fn <- function(i){
|
|
cbind(snp.ids[i], fisher.test(observed.tables.list[[i]])$p.value)
|
|
}
|
|
|
|
# apply fisher exact test to all SNPs
|
|
fish.df <- data.frame(t(sapply(1:ncol(genotypes.df), fish_fn)))
|
|
colnames(fish.df) <- c("rs", "p_value")
|
|
|
|
# sort SNPs by Fisher exact p-value
|
|
if (!require("dplyr")) install.packages("dplyr")
|
|
library(dplyr)
|
|
fish.results <- fish.df %>%
|
|
mutate_at("p_value", as.character) %>%
|
|
mutate_at("p_value", as.numeric) %>%
|
|
arrange(p_value)
|
|
print(fish.results)
|