Finalizing Part A: Chi-Square tests and fisher tests

This commit is contained in:
Noah L. Schrick 2022-10-20 16:41:05 -05:00
parent 4fbbeaeac7
commit cc0c499840
3 changed files with 34 additions and 2 deletions

View File

@ -1 +1 @@
,noah,NovaArchSys,20.10.2022 16:12,file:///home/noah/.config/libreoffice/4;
,noah,NovaArchSys,20.10.2022 16:40,file:///home/noah/.config/libreoffice/4;

View File

@ -22,3 +22,35 @@ colnames(genotypes.df) <- snp.ids
# observed contingency table for SNP rs630969
table(phenotype,genotypes.df$rs630969,
dnn=c("phenotype","genotype")) # dnn dimension names of table
dim(genotypes.df)
#### Part A: Chi-Square Test
# creates list of observed contingency tables for all SNPs
# sapply acts on each column of genotypes.df
observed.tables.list <- sapply(genotypes.df, function(x)
table(phenotype,x,dnn=c("phenotype","genotype")))
test.table <- observed.tables.list$rs634228
genoMarg.vec <- colSums(test.table) # margin vector
phenoMarg.vec <- rowSums(test.table) # margin vector
totalSubj <- sum(genoMarg.vec) # total subjects
expect.test <- outer(phenoMarg.vec,genoMarg.vec/totalSubj,'*')
## Fisher Test
# Fisher exact test (chi-square test) for all SNPs
fish_fn <- function(i){
cbind(snp.ids[i], fisher.test(observed.tables.list[[i]])$p.value)
}
# apply fisher exact test to all SNPs
fish.df <- data.frame(t(sapply(1:ncol(genotypes.df), fish_fn)))
colnames(fish.df) <- c("rs", "p_value")
# sort SNPs by Fisher exact p-value
if (!require("dplyr")) install.packages("dplyr")
library(dplyr)
fish.results <- fish.df %>%
mutate_at("p_value", as.character) %>%
mutate_at("p_value", as.numeric) %>%
arrange(p_value)
print(fish.results)

Binary file not shown.