Network-Clustering/mutual_information.R
2022-03-24 03:04:51 -05:00

78 lines
2.1 KiB
R

entropy <- function(x){
n <- length(x)
unique_states <- unique(x)
# initialize the prob of each state to 0
# two elements for p(x=0) and p(x=1)
p <- rep(0,length(unique_states))
# update two elements of p
# curr_state = 0 or 1
i <- 1 # R starts counting arrays at 1!
for (curr_state in unique_states){
p[i] = sum(x==curr_state)/n # prob of a unique state
i <- i + 1
}
H = sum(-p*log2(p))
return(H)
}
mutualInformation <- function(X,Y){
# X and Y are vectors of states
n <- length(X)
X.unique <- unique(X) # unique states in X
Y.unique <- unique(Y)
# creates joint states between X and Y states
# This needs to be for all pairs in the X and Y vecs
# this is where I messed up
# xy.joint <- interaction(X.states,Y.states,sep=":")
xy.joint <- interaction(X,Y,sep=":")
# initialize mu=0
mu <- 0
for (x in X.unique){
for (y in Y.unique){
# sum over all pairs of unique x and y states
# for example, 4 pairs
px <- sum(x==X)/n # marg prob of state x
py <- sum(y==Y)/n # marg prob
xy <- paste(x,y,sep=":") # create observed pair val
pxy <- sum(xy==xy.joint)/n # joint prob of pair
if (pxy>0){
mu <- mu + pxy*log2(pxy/(px*py))
}
}
}
return(mu)
}
normalizedMI <- function(X,Y){
mu <- mutualInformation(X,Y)
Hx <- entropy(X)
Hy <- entropy(Y)
nmi <- 2*mu/(Hx+Hy)
return(nmi)
}
# Example X and Y vectors
n <- 10 # num of obsvs
p0 <- .5 # prob of first state
# Roughly 50:50 1's and 0's
x1 <- sample(c(0,1), size=n,
replace=T, prob=c(p0,1-p0))
# c(0,1) -- possible state values could be c("a","b")
# size=n -- number of random samples of the state levels
# number of observations or length of output
# replace = T -- sample with replacement
# so don't run out of states when sampling
# prob=c(p0,1-p0) -- probability of sampling each state
y1 <- rep(1,n) # all 1's
interaction(x1,y1,sep=":")
unique(y1)
entropy(x1)
mutualInformation(x1,y1)
mutualInformation(x1,x1)
normalizedMI(x1,y1) # close to 0 or 0
normalizedMI(x1,x1) # should be 1 because normalized