38 lines
1.3 KiB
R
38 lines
1.3 KiB
R
# Lab 3 for the University of Tulsa's CS-6643 Bioinformatics Course
|
|
# Expression Exploratory Analysis
|
|
# Professor: Dr. McKinney, Fall 2022
|
|
# Noah L. Schrick - 1492657
|
|
|
|
## Set Working Directory to file directory - RStudio approach
|
|
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
|
|
|
#### Part A: Loading Data
|
|
## 1: Loading Gene Expression Data
|
|
load("sense.filtered.cpm.Rdata")
|
|
dim(sense.filtered.cpm)
|
|
colnames(sense.filtered.cpm)
|
|
|
|
## 2: Demographic Data
|
|
# Loading
|
|
subject.attrs <- read.csv("Demographic_symptom.csv", stringsAsFactors = FALSE)
|
|
dim(subject.attrs) # 160 subjects x 40 attributes
|
|
colnames(subject.attrs) # interested in X (sample ids) and Diag (diagnosis)
|
|
subject.attrs$X
|
|
subject.attrs$Diag
|
|
|
|
# Matching gene expression samples with their diagnosis
|
|
if (!require("dplyr")) install.packages("dplyr")
|
|
library(dplyr)
|
|
# create a phenotype vector
|
|
# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that
|
|
# intersect %in% with the RNA-Seq data
|
|
phenos.df <- subject.attrs %>%
|
|
filter(X %in% colnames(sense.filtered.cpm)) %>%
|
|
dplyr::select(X, Diag)
|
|
colnames(phenos.df) # $Diag is mdd diagnosis
|
|
# grab Diag column and convert character to factor
|
|
mddPheno <- as.factor(phenos.df$Diag) # this is our phenotype/class vector
|
|
|
|
summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control
|
|
|