CS-7863-Sci-Stat-Proj-6/Schrick-Noah_Simulated-Data.R

55 lines
2.3 KiB
R

if (!require("devtools")) install.packages("devtools")
library(devtools)
install_github("insilico/npdro")
if (!require("npdro")) install.packages("npdro")
library(npdro)
if (!require("dplyr")) install.packages("dplyr")
library(dplyr)
create_data <- function(num.samples=300, num.variables=100,
pct.imbalance=0.5,pct.signals=0.2,
main.bias=0.5,interaction.bias=1,
hi.cor=0.95,lo.cor=0.2,
mix.type="main-interactionScalefree",
label="class",sim.type="mixed",
pct.mixed=0.5,pct.train=0.5,
pct.holdout=0.5,pct.validation=0,
plot.graph=F,graph.structure = NULL,
verbose=T){
dataset <- npdro::createSimulation2(num.samples=num.samples,
num.variables=num.variables,
pct.imbalance=pct.imbalance,
pct.signals=pct.signals,
main.bias=main.bias,
interaction.bias=interaction.bias,
hi.cor=hi.cor,
lo.cor=lo.cor,
mix.type=mix.type,
label=label,
sim.type=sim.type,
pct.mixed=pct.mixed,
pct.train=pct.train,
pct.holdout=pct.holdout,
pct.validation=pct.validation,
plot.graph=plot.graph,
graph.structure = graph.structure,
verbose=verbose)
train <- dataset$train #150x101
test <- dataset$holdout
validation <- dataset$validation
dataset$signal.names
colnames(train)
# separate the class vector from the predictor data matrix
train.X <- train[, -which(colnames(train) == "class")]
train.y <- train[, "class"]
train.y.01 <- as.numeric(train.y)-1
return(list(train=train, test=test, train.X=train.X, train.y=train.y,
validation=validation, train.y.01=train.y.01))
}