diff --git a/.~lock.lab3_expression.docx# b/.~lock.lab3_expression.docx# new file mode 100644 index 0000000..327da75 --- /dev/null +++ b/.~lock.lab3_expression.docx# @@ -0,0 +1 @@ +,noah,NovaArchSys,22.09.2022 16:59,file:///home/noah/.config/libreoffice/4; \ No newline at end of file diff --git a/Schrick-Noah_CS-6643_Lab-3.R b/Schrick-Noah_CS-6643_Lab-3.R new file mode 100644 index 0000000..db8d9f7 --- /dev/null +++ b/Schrick-Noah_CS-6643_Lab-3.R @@ -0,0 +1,37 @@ +# Lab 3 for the University of Tulsa's CS-6643 Bioinformatics Course +# Expression Exploratory Analysis +# Professor: Dr. McKinney, Fall 2022 +# Noah L. Schrick - 1492657 + +## Set Working Directory to file directory - RStudio approach +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) + +#### Part A: Loading Data +## 1: Loading Gene Expression Data +load("sense.filtered.cpm.Rdata") +dim(sense.filtered.cpm) +colnames(sense.filtered.cpm) + +## 2: Demographic Data +# Loading +subject.attrs <- read.csv("Demographic_symptom.csv", stringsAsFactors = FALSE) +dim(subject.attrs) # 160 subjects x 40 attributes +colnames(subject.attrs) # interested in X (sample ids) and Diag (diagnosis) +subject.attrs$X +subject.attrs$Diag + +# Matching gene expression samples with their diagnosis +if (!require("dplyr")) install.packages("dplyr") +library(dplyr) +# create a phenotype vector +# grab X (subject ids) and Diag (Diagnosis) from subject.attrs that +# intersect %in% with the RNA-Seq data +phenos.df <- subject.attrs %>% + filter(X %in% colnames(sense.filtered.cpm)) %>% + dplyr::select(X, Diag) +colnames(phenos.df) # $Diag is mdd diagnosis +# grab Diag column and convert character to factor +mddPheno <- as.factor(phenos.df$Diag) # this is our phenotype/class vector + +summary(mddPheno) # MDD -- major depressive disorder, HC -- healthy control + diff --git a/lab3_expression.docx b/lab3_expression.docx index 8d4dade..6f4bf6d 100644 Binary files a/lab3_expression.docx and b/lab3_expression.docx differ