32 lines
1.1 KiB
R
32 lines
1.1 KiB
R
# Lab 4 for the University of Tulsa's CS-6643 Bioinformatics Course
|
|
# Differential Expression
|
|
# Professor: Dr. McKinney, Fall 2022
|
|
# Noah L. Schrick - 1492657
|
|
|
|
## Set Working Directory to file directory - RStudio approach
|
|
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
|
|
|
#### Part A: Preparing Data
|
|
load("sense.filtered.cpm.Rdata")
|
|
# load phenotype (mdd/hc) data
|
|
subject.attrs <- read.csv("Demographic_symptom.csv",
|
|
stringsAsFactors = FALSE)
|
|
|
|
if (!require("dplyr")) install.packages("dplyr")
|
|
library(dplyr)
|
|
# grab intersecting X (subject ids) and Diag (Diagnosis) from columns
|
|
phenos.df <- subject.attrs %>%
|
|
filter(X %in% colnames(sense.filtered.cpm)) %>%
|
|
dplyr::select(X, Diag)
|
|
mddPheno <- as.factor(phenos.df$Diag)
|
|
|
|
# Normalized and transform
|
|
library(preprocessCore)
|
|
mddExprData_quantile <- normalize.quantiles(sense.filtered.cpm)
|
|
mddExprData_quantileLog2 <- log2(mddExprData_quantile)
|
|
# attach phenotype names and gene names to data
|
|
colnames(mddExprData_quantileLog2) <- mddPheno
|
|
rownames(mddExprData_quantileLog2) <- rownames(sense.filtered.cpm)
|
|
|
|
length(rownames(sense.filtered.cpm))
|
|
#### Part B: Filter noise genes |