513 lines
19 KiB
R
513 lines
19 KiB
R
################# Max-Log-Likelihood #################
|
|
n <- length(g.breaks.clean)
|
|
kmin <- g.breaks.clean[1]
|
|
alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
|
|
alpha.ML
|
|
lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
|
|
# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Degree Distribution
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
library(igraph)
|
|
library(igraphdata)
|
|
data(yeast)
|
|
g <- yeast
|
|
g.netname <- "Yeast"
|
|
################# Set up Work #################
|
|
g.vec <- degree(g)
|
|
g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
|
|
" Network"))
|
|
legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
|
|
"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
|
|
"#006CD1", "#E66100", "#D35FB7"))
|
|
g.mean <- mean(g.vec)
|
|
g.seq <- 0:max(g.vec) # x-axis
|
|
################# Guessing Alpha #################
|
|
alpha.guess <- 1.5
|
|
lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
|
|
################# Poisson #################
|
|
g.pois <- dpois(g.seq, g.mean, log=F)
|
|
lines(g.seq, g.pois, col="#006CD1", lty=2)
|
|
################# Linear model: Least-Squares Fit #################
|
|
#g.breaks <- g.hist$breaks[-c(1)] # remove 0
|
|
g.breaks <- g.hist$breaks # remove 0
|
|
g.probs <- g.hist$density[-1] # make lengths match
|
|
# Need to clean up probabilities that are 0
|
|
nz.probs.mask <- g.probs!=0
|
|
g.breaks.clean <- g.breaks[nz.probs.mask]
|
|
g.probs.clean <- g.breaks[nz.probs.mask]
|
|
#plot(log(g.breaks.clean), log(g.probs.clean))
|
|
g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
|
|
summary(g.fit)
|
|
alpha.LM <- coef(g.fit)[2]
|
|
lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
|
|
################# Max-Log-Likelihood #################
|
|
n <- length(g.breaks.clean)
|
|
kmin <- g.breaks.clean[1]
|
|
alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
|
|
alpha.ML
|
|
lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
|
|
# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Degree Distribution
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
library(igraph)
|
|
library(igraphdata)
|
|
data(yeast)
|
|
g <- yeast
|
|
g.netname <- "Yeast"
|
|
################# Set up Work #################
|
|
g.vec <- degree(g)
|
|
g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
|
|
" Network"))
|
|
legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
|
|
"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
|
|
"#006CD1", "#E66100", "#D35FB7"))
|
|
g.mean <- mean(g.vec)
|
|
g.seq <- 0:max(g.vec) # x-axis
|
|
################# Guessing Alpha #################
|
|
alpha.guess <- 1.5
|
|
lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
|
|
################# Poisson #################
|
|
g.pois <- dpois(g.seq, g.mean, log=F)
|
|
lines(g.seq, g.pois, col="#006CD1", lty=2)
|
|
################# Linear model: Least-Squares Fit #################
|
|
g.breaks <- g.hist$breaks[-c(1)] # remove 0
|
|
g.probs <- g.hist$density[-1] # make lengths match
|
|
# Need to clean up probabilities that are 0
|
|
nz.probs.mask <- g.probs!=0
|
|
g.breaks.clean <- g.breaks[nz.probs.mask]
|
|
g.probs.clean <- g.probs[nz.probs.mask]
|
|
#plot(log(g.breaks.clean), log(g.probs.clean))
|
|
g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
|
|
summary(g.fit)
|
|
alpha.LM <- coef(g.fit)[2]
|
|
lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
|
|
################# Max-Log-Likelihood #################
|
|
n <- length(g.breaks.clean)
|
|
kmin <- g.breaks.clean[1]
|
|
alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin)
|
|
alpha.ML
|
|
lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
|
|
alpha.LM
|
|
# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Degree Distribution
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
library(igraph)
|
|
library(igraphdata)
|
|
data(yeast)
|
|
g <- yeast
|
|
g.netname <- "Yeast"
|
|
################# Set up Work #################
|
|
g.vec <- degree(g)
|
|
g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
|
|
" Network"))
|
|
legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
|
|
"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
|
|
"#006CD1", "#E66100", "#D35FB7"))
|
|
g.mean <- mean(g.vec)
|
|
g.seq <- 0:max(g.vec) # x-axis
|
|
################# Guessing Alpha #################
|
|
alpha.guess <- 1.5
|
|
lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1)
|
|
################# Poisson #################
|
|
g.pois <- dpois(g.seq, g.mean, log=F)
|
|
lines(g.seq, g.pois, col="#006CD1", lty=2)
|
|
################# Linear model: Least-Squares Fit #################
|
|
g.breaks <- g.hist$breaks[-c(1)] # remove 0
|
|
g.probs <- g.hist$density[-1] # make lengths match
|
|
# Need to clean up probabilities that are 0
|
|
nz.probs.mask <- g.probs!=0
|
|
g.breaks.clean <- g.breaks[nz.probs.mask]
|
|
g.probs.clean <- g.probs[nz.probs.mask]
|
|
#plot(log(g.breaks.clean), log(g.probs.clean))
|
|
g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
|
|
summary(g.fit)
|
|
alpha.LM <- coef(g.fit)[2]
|
|
lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
|
|
################# Max-Log-Likelihood #################
|
|
n <- length(g.breaks.clean)
|
|
kmin <- g.breaks.clean[1]
|
|
alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
|
|
alpha.ML
|
|
lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
|
|
# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Degree Distribution
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
library(igraph)
|
|
library(igraphdata)
|
|
data(yeast)
|
|
g <- yeast
|
|
g.netname <- "Yeast"
|
|
################# Set up Work #################
|
|
g.vec <- degree(g)
|
|
g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
|
|
" Network"))
|
|
legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
|
|
"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
|
|
"#006CD1", "#E66100", "#D35FB7"))
|
|
g.mean <- mean(g.vec)
|
|
g.seq <- 0:max(g.vec) # x-axis
|
|
################# Guessing Alpha #################
|
|
alpha.guess <- 1.5
|
|
lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5)
|
|
################# Poisson #################
|
|
g.pois <- dpois(g.seq, g.mean, log=F)
|
|
lines(g.seq, g.pois, col="#006CD1", lty=2)
|
|
################# Linear model: Least-Squares Fit #################
|
|
g.breaks <- g.hist$breaks[-c(1)] # remove 0
|
|
g.probs <- g.hist$density[-1] # make lengths match
|
|
# Need to clean up probabilities that are 0
|
|
nz.probs.mask <- g.probs!=0
|
|
g.breaks.clean <- g.breaks[nz.probs.mask]
|
|
g.probs.clean <- g.probs[nz.probs.mask]
|
|
#plot(log(g.breaks.clean), log(g.probs.clean))
|
|
g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
|
|
summary(g.fit)
|
|
alpha.LM <- coef(g.fit)[2]
|
|
lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
|
|
################# Max-Log-Likelihood #################
|
|
n <- length(g.breaks.clean)
|
|
kmin <- g.breaks.clean[1]
|
|
alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
|
|
alpha.ML
|
|
lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
|
|
# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Degree Distribution
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
library(igraph)
|
|
library(igraphdata)
|
|
data(yeast)
|
|
g <- yeast
|
|
g.netname <- "Yeast"
|
|
################# Set up Work #################
|
|
g.vec <- degree(g)
|
|
g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
|
|
" Network"))
|
|
legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
|
|
"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
|
|
"#006CD1", "#E66100", "#D35FB7"))
|
|
g.mean <- mean(g.vec)
|
|
g.seq <- 0:max(g.vec) # x-axis
|
|
################# Guessing Alpha #################
|
|
alpha.guess <- 1.5
|
|
lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
|
|
################# Poisson #################
|
|
g.pois <- dpois(g.seq, g.mean, log=F)
|
|
lines(g.seq, g.pois, col="#006CD1", lty=2)
|
|
################# Linear model: Least-Squares Fit #################
|
|
g.breaks <- g.hist$breaks[-c(1)] # remove 0
|
|
g.probs <- g.hist$density[-1] # make lengths match
|
|
# Need to clean up probabilities that are 0
|
|
nz.probs.mask <- g.probs!=0
|
|
g.breaks.clean <- g.breaks[nz.probs.mask]
|
|
g.probs.clean <- g.probs[nz.probs.mask]
|
|
#plot(log(g.breaks.clean), log(g.probs.clean))
|
|
g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
|
|
summary(g.fit)
|
|
alpha.LM <- coef(g.fit)[2]
|
|
lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3)
|
|
################# Max-Log-Likelihood #################
|
|
n <- length(g.breaks.clean)
|
|
kmin <- g.breaks.clean[1]
|
|
alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
|
|
alpha.ML
|
|
lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4)
|
|
# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Degree Distribution
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
library(igraph)
|
|
library(igraphdata)
|
|
data(yeast)
|
|
g <- yeast
|
|
g.netname <- "Yeast"
|
|
################# Set up Work #################
|
|
g.vec <- degree(g)
|
|
g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
|
|
" Network"))
|
|
legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
|
|
"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
|
|
"#006CD1", "#E66100", "#D35FB7"))
|
|
g.mean <- mean(g.vec)
|
|
g.seq <- 0:max(g.vec) # x-axis
|
|
################# Guessing Alpha #################
|
|
alpha.guess <- 1.5
|
|
lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
|
|
################# Poisson #################
|
|
g.pois <- dpois(g.seq, g.mean, log=F)
|
|
lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3)
|
|
################# Linear model: Least-Squares Fit #################
|
|
g.breaks <- g.hist$breaks[-c(1)] # remove 0
|
|
g.probs <- g.hist$density[-1] # make lengths match
|
|
# Need to clean up probabilities that are 0
|
|
nz.probs.mask <- g.probs!=0
|
|
g.breaks.clean <- g.breaks[nz.probs.mask]
|
|
g.probs.clean <- g.probs[nz.probs.mask]
|
|
#plot(log(g.breaks.clean), log(g.probs.clean))
|
|
g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
|
|
summary(g.fit)
|
|
alpha.LM <- coef(g.fit)[2]
|
|
lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3)
|
|
################# Max-Log-Likelihood #################
|
|
n <- length(g.breaks.clean)
|
|
kmin <- g.breaks.clean[1]
|
|
alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
|
|
alpha.ML
|
|
lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3)
|
|
plot(yeast)
|
|
hist(yeast)
|
|
hist(g.vec)
|
|
g.pois
|
|
g.mean
|
|
alpha.LM
|
|
alpha.ML
|
|
degree(g)
|
|
sort(degree(g))
|
|
sort(degree(g),decreasing=FALSE)
|
|
sort(degree(g),decreasing=F)
|
|
sort(degree(g),decreasing=false)
|
|
sort(degree(g), decreasing = TRUE)
|
|
head(sort(degree(g), decreasing = TRUE))
|
|
stddev(degree(g))
|
|
sd(degree(g))
|
|
tail(sort(degree(g), decreasing = TRUE))
|
|
plot(log(g.breaks.clean), log(g.probs.clean))
|
|
# Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Degree Distribution
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
library(igraph)
|
|
library(igraphdata)
|
|
data(yeast)
|
|
g <- yeast
|
|
g.netname <- "Yeast"
|
|
################# Set up Work #################
|
|
g.vec <- degree(g)
|
|
g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname,
|
|
" Network"))
|
|
legend("topright", c("Guess", "Poisson", "Least-Squares Fit",
|
|
"Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6",
|
|
"#006CD1", "#E66100", "#D35FB7"))
|
|
g.mean <- mean(g.vec)
|
|
g.seq <- 0:max(g.vec) # x-axis
|
|
################# Guessing Alpha #################
|
|
alpha.guess <- 1.5
|
|
lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3)
|
|
################# Poisson #################
|
|
g.pois <- dpois(g.seq, g.mean, log=F)
|
|
lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3)
|
|
################# Linear model: Least-Squares Fit #################
|
|
g.breaks <- g.hist$breaks[-c(1)] # remove 0
|
|
g.probs <- g.hist$density[-1] # make lengths match
|
|
# Need to clean up probabilities that are 0
|
|
nz.probs.mask <- g.probs!=0
|
|
g.breaks.clean <- g.breaks[nz.probs.mask]
|
|
g.probs.clean <- g.probs[nz.probs.mask]
|
|
plot(log(g.breaks.clean), log(g.probs.clean))
|
|
g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean))
|
|
summary(g.fit)
|
|
alpha.LM <- coef(g.fit)[2]
|
|
lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3)
|
|
################# Max-Log-Likelihood #################
|
|
n <- length(g.breaks.clean)
|
|
kmin <- g.breaks.clean[1]
|
|
alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin))
|
|
alpha.ML
|
|
lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3)
|
|
plot(log(g.breaks.clean), log(g.probs.clean))
|
|
g.breaks.clean <- g.breaks[nz.probs.mask]
|
|
g.probs.clean <- g.probs[nz.probs.mask]
|
|
plot(log(g.breaks.clean), log(g.probs.clean))
|
|
## Turn into function to repeat for all SNPs
|
|
LR.fn <- function(i){
|
|
lr <- glm(pheno.factor~genotypes.df[[i]],family=binomial)
|
|
td.lr <- tidy(lr)
|
|
pval_vec <- td.lr$p.value # vector of $p.value from td.lr
|
|
coef_vec <- td.lr$estimate # vector of $estimate
|
|
cbind(snp.ids[i], coef_vec[1], coef_vec[2], coef_vec[3], pval_vec[1], pval_vec[2], pval_vec[3])
|
|
}
|
|
# apply Logistic Regression model to all SNPs
|
|
LRresults.df <- data.frame(t(sapply(1:ncol(genotypes.df), LR.fn)))
|
|
# Lab 6 for the University of Tulsa's CS-6643 Bioinformatics Course
|
|
# GWAS
|
|
# Professor: Dr. McKinney, Fall 2022
|
|
# Noah L. Schrick - 1492657
|
|
## Set Working Directory to file directory - RStudio approach
|
|
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
|
#### Part 0: PLINK
|
|
if (!require("BiocManager")) install.packages("BiocManager")
|
|
library(BiocManager)
|
|
if (!require("snpStats")) BiocManager::install("snpStats")
|
|
library(snpStats)
|
|
ex.data <- read.pedfile(file="extra.ped", snps="extra.map")
|
|
ex.data$fam
|
|
phenotype <- ex.data$fam$affected-1 # change pheno from 1/2 to 0/1
|
|
genotypes <- ex.data$genotypes # encoded as AA/AB/BB
|
|
snp.ids <- as.character(ex.data$map$snp.names)
|
|
genotypes.df <- data.frame(as(genotypes, "character"))
|
|
colnames(genotypes.df) <- snp.ids
|
|
# observed contingency table for SNP rs630969
|
|
table(phenotype,genotypes.df$rs630969,
|
|
dnn=c("phenotype","genotype")) # dnn dimension names of table
|
|
dim(genotypes.df)
|
|
#### Part A: Chi-Square Test
|
|
# creates list of observed contingency tables for all SNPs
|
|
# sapply acts on each column of genotypes.df
|
|
observed.tables.list <- sapply(genotypes.df, function(x)
|
|
table(phenotype,x,dnn=c("phenotype","genotype")))
|
|
test.table <- observed.tables.list$rs634228
|
|
genoMarg.vec <- colSums(test.table) # margin vector
|
|
phenoMarg.vec <- rowSums(test.table) # margin vector
|
|
totalSubj <- sum(genoMarg.vec) # total subjects
|
|
expect.test <- outer(phenoMarg.vec,genoMarg.vec/totalSubj,'*')
|
|
## Fisher Test
|
|
# Fisher exact test (chi-square test) for all SNPs
|
|
fish_fn <- function(i){
|
|
cbind(snp.ids[i], fisher.test(observed.tables.list[[i]])$p.value)
|
|
}
|
|
# apply fisher exact test to all SNPs
|
|
fish.df <- data.frame(t(sapply(1:ncol(genotypes.df), fish_fn)))
|
|
colnames(fish.df) <- c("rs", "p_value")
|
|
# sort SNPs by Fisher exact p-value
|
|
if (!require("dplyr")) install.packages("dplyr")
|
|
library(dplyr)
|
|
fish.results <- fish.df %>%
|
|
mutate_at("p_value", as.character) %>%
|
|
mutate_at("p_value", as.numeric) %>%
|
|
arrange(p_value)
|
|
print(fish.results)
|
|
#### Part B: Logistic regression with genotypes
|
|
if (!require("ggplot2")) BiocManager::install("ggplot2")
|
|
library(ggplot2)
|
|
i<-8
|
|
A1<-ex.data$map$allele.1[i]
|
|
A2<-ex.data$map$allele.2[i]
|
|
geno.labels <- c(paste(A1,A1,sep=""),paste(A1,A2,sep=""),paste(A2,A2,sep=""))
|
|
## Plot with ggplot2
|
|
# data from the one SNP
|
|
oneSNP.df <- data.frame(cbind(genotypes.df[[i]],as.numeric(phenotype)))
|
|
colnames(oneSNP.df) <- c("genotypes","phenotypes")
|
|
lr.plot <- ggplot(oneSNP.df, aes(x=genotypes, y=phenotypes)) +
|
|
geom_point(position = position_jitter(w = 0.1, h = 0.1)) +
|
|
# stat_smooth plots the probability based on the model
|
|
stat_smooth(method="glm", method.args = list(family = "binomial")) #+
|
|
#xlim(geno.labels) + ggtitle(snp.ids[i])
|
|
print(lr.plot)
|
|
## Fit a logistic regression model of phenotype with SNP in the 8th column
|
|
if (!require("broom")) install.packages("broom")
|
|
library(broom) # for tidy function
|
|
pheno.factor <- factor(phenotype,labels=c(0,1))
|
|
i<-8
|
|
lr <- glm(pheno.factor~genotypes.df[[i]],family=binomial)
|
|
td.lr <- tidy(lr)
|
|
pval_vec <- td.lr$p.value # vector of $p.value from td.lr
|
|
coef_vec <- td.lr$estimate # vector of $estimate
|
|
cbind(snp.ids[i], coef_vec[1], coef_vec[2], coef_vec[3], pval_vec[1], pval_vec[2], pval_vec[3])
|
|
## Turn into function to repeat for all SNPs
|
|
LR.fn <- function(i){
|
|
lr <- glm(pheno.factor~genotypes.df[[i]],family=binomial)
|
|
td.lr <- tidy(lr)
|
|
pval_vec <- td.lr$p.value # vector of $p.value from td.lr
|
|
coef_vec <- td.lr$estimate # vector of $estimate
|
|
cbind(snp.ids[i], coef_vec[1], coef_vec[2], coef_vec[3], pval_vec[1], pval_vec[2], pval_vec[3])
|
|
}
|
|
# apply Logistic Regression model to all SNPs
|
|
LRresults.df <- data.frame(t(sapply(1:ncol(genotypes.df), LR.fn)))
|
|
# add column names to results data frame
|
|
colnames(LRresults.df) <- c("rs", "AAintercept", "ABcoef", "BBcoef", "AA.pval", "AB.pval", "BB.pval")
|
|
# The following sorts LR results by the p-value of the BB
|
|
# homozygous coefficient. tidy made $p_value a factor and when you try to
|
|
# convert directly to numeric (as.numeric) turns factors into integer and
|
|
# this messes up sorting especially with scientific notation
|
|
lr.results.sorted <- LRresults.df %>%
|
|
mutate_at("BB.pval", as.character) %>% # convert to char before numeric
|
|
mutate_at("BB.pval", as.numeric) %>% # convert to numeric for arrange
|
|
arrange(BB.pval) # sort
|
|
as.matrix(lr.results.sorted %>% pull(rs,BB.pval))
|
|
fish.df
|
|
fish.results
|
|
fish.results$rs
|
|
lr.results.sorted
|
|
lr.results.sorted$rs
|
|
table(fish.results$rs, lr.results.sorted$rs)
|
|
expand.grid(fish=fish.results$rs, lr=lr.results.sorted$rs)
|
|
CJ(fish=fish.results$rs, lr=lr.results.sorted$rs)
|
|
table(fish=fish.results$rs, lr=lr.results.sorted$rs)
|
|
as.table(fish=fish.results$rs, lr=lr.results.sorted$rs)
|
|
setNames(fish.results$rs, lr.results.sorted$rs)
|
|
as.table(setNames(fish.results$rs, lr.results.sorted$rs))
|
|
data.frame(fish.results$rs, lr.results.sorted$rs)
|
|
tmp <- data.frame(fish.results$rs, lr.results.sorted$rs)
|
|
?plot
|
|
length(fish.results$rs)
|
|
length(lr.results.sorted$rs)
|
|
?plot(x=1:length(fish.results$rs))
|
|
?plot(y=1:length(fish.results$rs), x=fish.results$rs)
|
|
plot(y=1:length(fish.results$rs), x=fish.results$rs)
|
|
plot(fish.results)
|
|
plot(fish.results$rs)
|
|
fish.results$rs
|
|
class(fish.results$rs)
|
|
as.vector(fish.results$rs)
|
|
plot(as.vector(fish.results$rs))
|
|
as.data.frame(fish.results$rs)
|
|
plot(as.data.frame(fish.results$rs))
|
|
ggplot(as.data.frame(fish.results$rs))
|
|
ggplot(as.data.frame(fish.results$rs))
|
|
plot(as.data.frame(fish.results$rs))
|
|
tmpy<-1:length(fish.results$rs)
|
|
tmpy
|
|
yvals <- 1:length(fish.results$rs)
|
|
xfish <- fish.results$rs
|
|
xfish
|
|
which(fish.results$rs == "rs17786052")
|
|
which(fish.results$rs == "rs17786052")[[1]]
|
|
snp.ids
|
|
snp.ids[i]
|
|
match(fish.results$rs, snp.ids)
|
|
yvals <- 1:length(fish.results$rs)
|
|
xvals <- snp.ids
|
|
fishvals <- match(fish.results$rs, snp.ids)
|
|
lrvals <- match(lr.results.sorted$rs, snp.ids)
|
|
plot(xvals, fishvals)
|
|
fishvals
|
|
plot(xlimxvals, fishvals, type="l")
|
|
plot(xvals, fishvals, type="l")
|
|
plot(1:17, yvals, type=l, xaxt="none", xlab="snp")
|
|
plot(1:17, yvals, type="l", xaxt="none", xlab="snp")
|
|
axis(1, axTicks(1), labels=snp.ids)
|
|
snp.ids
|
|
axis(labels=snp.ids)
|
|
?axis
|
|
axis(labels=snp.ids, side=1)
|
|
axis(labels=snp.ids, side=1, at=1)
|
|
axis(labels=snp.ids, side=1, at=17)
|
|
axis(labels=snp.ids, side=1, at=17)
|
|
length(snp.ids)
|
|
length(snp.ids[1])
|
|
xlabels <- c(snp.ids)
|
|
xlabels
|
|
axis(xlabels=snp.ids)
|
|
axis(labels=xlabels)
|
|
axis(labels=xlabels, side=1)
|
|
axis(side = 1, at = 1:length(fish.results$rs), labels=snp.ids)
|
|
plot(1:17, fishvals, type="l", xaxt="none", xlab="snp", ylab="rank")
|
|
axis(side = 1, at = 1:length(fish.results$rs), labels=snp.ids)
|
|
lines(1:length(lr.results.sorted$rs), lrvals, type="l", col="red", lwd =2, lty=1)
|
|
plot(1:length(fish.results$rs), fishvals, type="l", xaxt="none",
|
|
xlab="snp", ylab="rank", lty=1, col=1)
|
|
axis(side = 1, at = 1:length(fish.results$rs), labels=snp.ids)
|
|
lines(1:length(lr.results.sorted$rs), lrvals, type="l", col=2, lty=2)
|
|
legend(x="topright", legend=c("Chi-Square Fisher Test", "Logistic Regression"),
|
|
lty=c(1,2), col=c(1,2))
|
|
plot(1:length(fish.results$rs), fishvals, type="l", xaxt="none",
|
|
xlab="snp", ylab="rank", lty=1, col=1)
|
|
axis(side = 1, at = 1:length(fish.results$rs), labels=snp.ids)
|
|
lines(1:length(lr.results.sorted$rs), lrvals, type="l", col=2, lty=2, lwd=2)
|
|
legend(x="topright", legend=c("Chi-Square Fisher Test", "Logistic Regression"),
|
|
lty=c(1,2), col=c(1,2))
|