g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.breaks[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1,2)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.breaks[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1,2,3)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.breaks[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.breaks[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# #g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.breaks <- g.hist$breaks # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.breaks[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean)/kmin) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) alpha.LM # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=5) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] #plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) plot(yeast) hist(yeast) hist(g.vec) g.pois g.mean alpha.LM alpha.ML degree(g) sort(degree(g)) sort(degree(g),decreasing=FALSE) sort(degree(g),decreasing=F) sort(degree(g),decreasing=false) sort(degree(g), decreasing = TRUE) head(sort(degree(g), decreasing = TRUE)) stddev(degree(g)) sd(degree(g)) tail(sort(degree(g), decreasing = TRUE)) plot(log(g.breaks.clean), log(g.probs.clean)) # Homework 4 for the University of Tulsa' s CS-7863 Network Theory Course # Degree Distribution # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 library(igraph) library(igraphdata) data(yeast) g <- yeast g.netname <- "Yeast" ################# Set up Work ################# g.vec <- degree(g) g.hist <- hist(g.vec, freq=FALSE, main=paste("Histogram of the", g.netname, " Network")) legend("topright", c("Guess", "Poisson", "Least-Squares Fit", "Max Log-Likelihood"), lty=c(1,2,3,4), col=c("#40B0A6", "#006CD1", "#E66100", "#D35FB7")) g.mean <- mean(g.vec) g.seq <- 0:max(g.vec) # x-axis ################# Guessing Alpha ################# alpha.guess <- 1.5 lines(g.seq, g.seq^(-alpha.guess), col="#40B0A6", lty=1, lwd=3) ################# Poisson ################# g.pois <- dpois(g.seq, g.mean, log=F) lines(g.seq, g.pois, col="#006CD1", lty=2, lwd=3) ################# Linear model: Least-Squares Fit ################# g.breaks <- g.hist$breaks[-c(1)] # remove 0 g.probs <- g.hist$density[-1] # make lengths match # Need to clean up probabilities that are 0 nz.probs.mask <- g.probs!=0 g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] plot(log(g.breaks.clean), log(g.probs.clean)) g.fit <- lm(log(g.probs.clean)~log(g.breaks.clean)) summary(g.fit) alpha.LM <- coef(g.fit)[2] lines(g.seq, g.seq^(-alpha.LM), col="#E66100", lty=3, lwd=3) ################# Max-Log-Likelihood ################# n <- length(g.breaks.clean) kmin <- g.breaks.clean[1] alpha.ML <- 1 + n/sum(log(g.breaks.clean/kmin)) alpha.ML lines(g.seq, g.seq^(-alpha.ML), col="#D35FB7", lty=4, lwd=3) plot(log(g.breaks.clean), log(g.probs.clean)) g.breaks.clean <- g.breaks[nz.probs.mask] g.probs.clean <- g.probs[nz.probs.mask] plot(log(g.breaks.clean), log(g.probs.clean)) BiocManager::install() simDats <- createSimulation2(data.type = "discrete", avg.maf = 0.2, sim.type = "mainEffect", pct.train = 0.5, pct.holdout = 0.5, pct.validation = 0, main.bias = 0.4, pct.signals = 0.2) # npdro::createSimulation2() example for gwas simulation library(npdro) # npdro::createSimulation2() example for gwas simulation if (!require("npdro")) install.packages("npdro") library(npdro) # npdro::createSimulation2() example for gwas simulation install_github("insilico/npdro") # npdro::createSimulation2() example for gwas simulation if (!require("devtools")) install.packages("devtools") library(devtools) install_github("insilico/npdro") # npdro::createSimulation2() example for gwas simulation if (!require("devtools")) install.packages("devtools") ?createSimulation2 # npdro::createSimulation2() example for gwas simulation if (!require("devtools")) install.packages("devtools") library(devtools) install_github("insilico/npdro") library(npdro) ## Set Working Directory to file directory - RStudio approach setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) gen.gwas = function(samples, vars, batch_size=1000){ for (x in 1:ceiling(samples/batch_size)){ curr_batch <- samples - (batch_size*(x-1)) batch_gen <- ifelse(curr_batch > batch_size, batch_size, curr_batch) data <- createSimulation2(data.type = "discrete", avg.maf = 0.2, sim.type = "mainEffect", pct.train = 1.0, pct.imbalance=round(runif(n = 1, min = 0.1, max = 0.9),2), main.bias = 0.4, pct.signals = 0.2, num.samples = batch_gen, num.variables = vars) rownames(data$train) <- as.integer(rownames(data$train))+ (batch_size*(x-1)) if (x == 1){ write.table(data$train, "artif_gwas.csv", row.names = TRUE) } else{ write.table(data$train, "artif_gwas.csv", row.names = TRUE, append = TRUE, col.names = FALSE) } } return(data$train) } ?createSimulation2