entropy <- function(x){ n <- length(x) unique_states <- unique(x) # initialize the prob of each state to 0 # two elements for p(x=0) and p(x=1) p <- rep(0,length(unique_states)) # update two elements of p # curr_state = 0 or 1 i <- 1 # R starts counting arrays at 1! for (curr_state in unique_states){ p[i] = sum(x==curr_state)/n # prob of a unique state i <- i + 1 } H = sum(-p*log2(p)) return(H) } mutualInformation <- function(X,Y){ # X and Y are vectors of states n <- length(X) X.unique <- unique(X) # unique states in X Y.unique <- unique(Y) # creates joint states between X and Y states # This needs to be for all pairs in the X and Y vecs # this is where I messed up # xy.joint <- interaction(X.states,Y.states,sep=":") xy.joint <- interaction(X,Y,sep=":") # initialize mu=0 mu <- 0 for (x in X.unique){ for (y in Y.unique){ # sum over all pairs of unique x and y states # for example, 4 pairs px <- sum(x==X)/n # marg prob of state x py <- sum(y==Y)/n # marg prob xy <- paste(x,y,sep=":") # create observed pair val pxy <- sum(xy==xy.joint)/n # joint prob of pair if (pxy>0){ mu <- mu + pxy*log2(pxy/(px*py)) } } } return(mu) } normalizedMI <- function(X,Y){ mu <- mutualInformation(X,Y) Hx <- entropy(X) Hy <- entropy(Y) nmi <- 2*mu/(Hx+Hy) return(nmi) } # Example X and Y vectors n <- 10 # num of obsvs p0 <- .5 # prob of first state # Roughly 50:50 1's and 0's x1 <- sample(c(0,1), size=n, replace=T, prob=c(p0,1-p0)) # c(0,1) -- possible state values could be c("a","b") # size=n -- number of random samples of the state levels # number of observations or length of output # replace = T -- sample with replacement # so don't run out of states when sampling # prob=c(p0,1-p0) -- probability of sampling each state y1 <- rep(1,n) # all 1's interaction(x1,y1,sep=":") unique(y1) entropy(x1) mutualInformation(x1,y1) mutualInformation(x1,x1) normalizedMI(x1,y1) # close to 0 or 0 normalizedMI(x1,x1) # should be 1 because normalized