Added all functions. Results not correct.

This commit is contained in:
BuildTools 2021-09-21 16:39:47 -05:00
parent b6d03c3a7f
commit da54e609f8
2 changed files with 298 additions and 30 deletions

1
exact_inference Submodule

@ -0,0 +1 @@
Subproject commit 9e3e13fe6b2b982a12a5a3f391f91107e95f13b8

319
main.py
View File

@ -6,37 +6,64 @@
import json
import random
import math
import sys
import os
#pwd = os.getcwd()
#sys.path.append(pwd +'/gen-bn/')
#sys.path.append("./gen-bn")
import numpy as np
from shutil import copyfile
import gen_bn.gen_bn
#from collections import defaultdict
NETWORK_SIZE = 20
NETWORK_TYPE = "polytree"
NUM_SAMPLES = 200
ALPHA = 0.75
def main():
#Generate a new BN. Specify type and number of nodes in network
gen_json("dag", 5)
print("Generating a Bayesian Network of type", NETWORK_TYPE, "and with", NETWORK_SIZE, "nodes.")
gen_json(NETWORK_TYPE, NETWORK_SIZE)
#Get our BN
bayes_net = import_bayes()
#Generate random evidence
E = gen_ev(bayes_net)
print("Generating random evidence:", E)
#Generate a random query that is not an evidence variable in the form of {var : val}
X = gen_query(E, bayes_net)
#Get W from LW
W = likelihood_weighting(X, E, bayes_net, 10)
#Print if desired
print("Generating random query:", X)
print()
for key, value in W.items():
print(key, ' : ', value)
#Get probability of query from LW given evidence
LW_prob = likelihood_weighting(X, E, bayes_net, NUM_SAMPLES)
print("Probability of", X, "given", E, "with the LW algorithm and", NUM_SAMPLES, "samples is:")
print(LW_prob)
GS_prob, tmp = gibbs_sampling(X, E, bayes_net, NUM_SAMPLES)
print("Probability of", X, "given", E, "with the GS algorithm and", NUM_SAMPLES, "samples is:")
print(GS_prob)
#Get probability of query from MH given evidence
MH_prob = metropolis_hastings(X, E, bayes_net, NUM_SAMPLES, ALPHA)
print("Probability of", X, "given", E, "with the MH algorithm and", NUM_SAMPLES, "samples, and a", ALPHA*100, "/", 100-(ALPHA*100), "split is:")
print(MH_prob)
query_var = (list(X.items())[0][0])
run_exact(query_var)
def run_exact(query):
#Get dir of exact_inference
os.chdir("./exact_inference")
dirname = os.getcwd()
#Go back up and into the gen_bn folder
os.chdir("..")
os.chdir("./gen_bn")
#copy the file to the exact_inference folder
copyfile("bn.json", dirname+'/bn.json')
os.chdir("../exact_inference")
#Run the exact_inference on the query variable
os.system('python exact_inference.py -f bn.json' + ' ' + '-q' + ' ' + str(query))
#Generate a new BN.
#Input: Type ("dag", or "polytree")
@ -57,11 +84,11 @@ def import_bayes():
def gen_ev(bayes_net):
total_nodes = len(bayes_net)
#Arbitrarily, let's only generate total_nodes/2 (rounded up) evidence variables at most, but at least 1
num_ev = random.randint(0, int(math.ceil(total_nodes/2)))
num_ev = random.randint(1, int(math.ceil(total_nodes/2)))
fixed_ev = []
#Go through and generate nodes that will be fixed
for i in range(num_ev):
fixed_var = random.randint(1, total_nodes-1)
fixed_var = random.randint(0, total_nodes-1)
if fixed_var not in fixed_ev:
fixed_ev.append(fixed_var)
#Now generate random values for the ev
@ -82,7 +109,7 @@ def gen_query(ev, bayes_net):
for e in ev_vars:
if int(e) in possible_vars:
possible_vars.remove(int(e))
query = random.choice(possible_vars)
query = str(random.choice(possible_vars))
rand_prob = random.random()
if rand_prob >= 0.5:
val = True
@ -91,7 +118,7 @@ def gen_query(ev, bayes_net):
return {query : val}
#Checks if node has parents
#Checks if node has parents. If not, it is a root
def is_root(node, BN):
return (BN[node]["parents"]) == []
@ -115,7 +142,7 @@ def get_parents(node, BN):
#E is a dict in the form of {"Node" : Value}
#Compute the estimate of P(X|e), where X is the query variable, and e is the observed value for variables E
def likelihood_weighting(X, e, bayes_net, num_samples):
def likelihood_weighting(X, e, bayes_net, num_samples, MH=0):
W = {}
for i in range(num_samples):
@ -166,12 +193,18 @@ def likelihood_weighting(X, e, bayes_net, num_samples):
#If the sample wasn't already in W, put it in there now
if not written:
W[len(W)] = {w : samples}
#use for MH alg
if(MH is not 0):
return W
prob = compute_prob(X, e, W)
return prob
#Return the probability of a node and the value dict, given the current evidence and fixed evidence
#Uses recursion to pull probabilites and values down through the network
def get_probability(node, samples, ev, bayes_net, w):
def get_probability(node, samples, ev, bayes_net, w, G=0):
parents = get_parents(node, bayes_net)
for parent in parents:
#If we already know the value of the parent, no need to reobtain
@ -192,11 +225,15 @@ def get_probability(node, samples, ev, bayes_net, w):
samples, prob, w = translate_ev(gparents, parent, ev, samples, bayes_net, w)
#Now that we have all the parents' values, we can get the node value, probability, and update samples
samples, prob, w = translate_ev(parents, node, ev, samples, bayes_net, w)
samples, prob, w = translate_ev(parents, node, ev, samples, bayes_net, w, G)
return samples, prob, w
#Given a node and its parents, determine the node's value and it's probability
def translate_ev(parents, node, ev, samples, bayes_net, w):
def translate_ev(parents, node, ev, samples, bayes_net, w, G=0):
#If G=1, it means we already set the state space to specific values.
#Meaning, we only need to compute the prob of that happening, NOT set values
#Call a different function instead
#Sort in ascending order
parents.sort()
node = str(node)
@ -228,12 +265,242 @@ def translate_ev(parents, node, ev, samples, bayes_net, w):
return samples, table_prob, w
def get_children(node, BN):
children = []
for x in range(len(BN)):
if node in BN[str(x)]["parents"]:
children.append(str(x))
return children
def gibbs_sampling():
print("Hello")
def product(nums):
result = 1
for num in nums:
result *= num
return result
def metropolis_hastings():
print("Hello")
#Given a node and state_space values, get prob from CPT
def get_cpt(node, samples, bayes_net):
parents = bayes_net[str(node)]["parents"]
parents.sort()
value_list = []
for parent in parents:
value = samples[str(parent)]
value_list.append(value)
for i in range(2**len(parents)):
#If the truth table matches the value combination we have
if bayes_net[str(node)]["prob"][i][0] == value_list:
return bayes_net[str(node)]["prob"][i][1]
def mb(x_node, e, bayes_net, state_space):
#x_node = (list(X.items())[0][0])
#print("NODE IS", x_node)
state_space[x_node] = True
#print("True SS")
#print(state_space)
x_true = get_cpt(x_node, state_space, bayes_net) * product(get_cpt(child, state_space, bayes_net) for child in get_children(x_node, bayes_net))
#print(x_true)
state_space[x_node] = False
#print("False SS")
#print(state_space)
x_false = get_cpt(x_node, state_space, bayes_net) * product(get_cpt(child, state_space, bayes_net) for child in get_children(x_node, bayes_net))
#print(x_false)
#print()
return x_true, x_false
#Given a query, the evidence, and a dict of samples, compute the prob of the query
def compute_prob(X, e, W):
#Combine the query and ev dicts for easier computation
combined_dict = {**X, **e}
#Initialize the probability of the query
query_prob = 0
ev_prob = 0
#print(W)
#Go through all the samples
for i in range(len(W)):
#Easy way to find all the instances where query var and ev match the samples - use subsets
#Convert W[index].values() to a list, since Python 3 returns a "view" of the dictionary with .values()
#Then remove the list aspect through [0], and obtain the values from the nested dict through .items()
if combined_dict.items() <= list(W[i].values())[0].items():
#Get the weight
query_prob+=list(W[i].items())[0][0]
#See if evidence is a subset
if e.items() <= list(W[i].values())[0].items():
ev_prob+=list(W[i].items())[0][0]
if(ev_prob==0):
return 0
else:
return (query_prob/ev_prob)
def gibbs_sampling(X, e, bayes_net, num_samples):
#State of the network, init'd with the query and ev
#x = {**X, **e}
x = {**e}
#Counts for each value of X
C = {}
for j in range(len(bayes_net)):
C[str(j)] = 0
#Get a list of non-ev variables to make things easier
Z = []
for node in bayes_net:
#if node not in e and node not in X:
if node not in e:
Z.append(node)
#Initialize the rest of the network randomly
for z in Z:
rand_prob = random.random()
if rand_prob >=0.5:
x[z] = True
else:
x[z] = False
for k in range(num_samples):
zi = random.choice(Z)
#Reuse the get_prob function, even though we don't care about maintaining a samples list or weight
#samples, prob, w = get_probability(str(zi), {}, e, bayes_net, 0)
z_true, z_false = mb(str(zi), e, bayes_net, x)
#Gen a random number to determine the value based on probability
value_prob = random.random()
if value_prob >= z_true:
x[zi] = True
C[zi]+=1
else:
x[zi] = False
for j in range(len(bayes_net)):
C[str(j)]/=num_samples
query_var = (list(X.items())[0][0])
query_val = (list(X.values())[0])
#print(C)
if(query_val):
GS_Prob = C[str(query_var)]
else:
GS_Prob = 1.00000 - C[str(query_var)]
return GS_Prob, x
#Function for using log-probabilities for preventing the underflow from MH as advised by Dr. Sen
def compute_log(prob):
return -0.5 * np.sum(prob ** 2)
def acceptance(xprime, x, compute_log):
return min(1, np.exp(compute_log(xprime) - compute_log(x)))
def metropolis_hastings(X, e, bayes_net, num_samples, alpha):
state_space = {**e}
Z = []
#Counts for each value of X
W = {}
C = {}
for j in range(len(bayes_net)):
C[str(j)] = 0
for node in bayes_net:
#if node not in e and node not in X:
if node not in e:
Z.append(node)
#Initialize the rest of the network randomly
for z in Z:
rand_prob = random.random()
if rand_prob >=0.5:
state_space[z] = True
else:
state_space[z] = False
#3 runs: 95% to run Gibbs for x', 5% for weighted sample. Then 85/15, 75/25
#100% acceptance
for num in range(num_samples):
proposal_choice = random.random()
#Weighted-Sample
if(proposal_choice <= (1-alpha)):
tmp = likelihood_weighting(X, e, bayes_net, 1, 1)
sample = list(tmp[0].values())[0]
#If this sample is already in W, don't add a new sample - only adjust the weight
written = False
for k in range(len(W)):
#If sample is in W
if sample in W[k].values():
#Pull the weight that's associated with the sample
key = list(W[k].items())[0][0]
#Increment count
new_key = key + 1
#Store it all back into W
W[k] = {new_key : sample}
#Make note that we've already written to W in this loop, so we don't write it again
written = True
#If the sample wasn't already in W, put it in there now
if not written:
W[len(W)] = {1 : sample}
#for key, value in W.items():
# print(key, ' : ', value)
#zi = random.choice(Z)
#state_space, prob, w = get_probability(zi, state_space, bayes_net, 0)
#Gibbs
else:
zi = random.choice(Z)
#Reuse the get_prob function, even though we don't care about maintaining a samples list or weight
#samples, prob, w = get_probability(str(zi), {}, e, bayes_net, 0)
z_true, z_false = mb(str(zi), e, bayes_net, state_space)
#Gen a random number to determine the value based on probability
value_prob = random.random()
if value_prob >= z_true:
state_space[zi] = True
C[zi]+=1
else:
state_space[zi] = False
#If this sample is already in W, don't add a new sample - only adjust the weight
written = False
for tmp in range(len(W)):
#If sample is in W
if state_space in W[tmp].values():
#Pull the weight that's associated with the sample
key = list(W[tmp].items())[0][0]
#Increment count
new_key = key + 1
#Store it all back into W
W[tmp] = {new_key : state_space}
#Make note that we've already written to W in this loop, so we don't write it again
written = True
#If the sample wasn't already in W, put it in there now
if not written:
W[len(W)] = {1 : state_space}
#for key, value in W.items():
# print(key, ' : ', value)
#Combine the query and ev dicts for easier computation
combined_dict = {**X, **e}
#Initialize the probability of the query
query_prob = 0
#Go through all the samples
for i in range(len(W)):
#Easy way to find all the instances where query var and ev match the samples - use subsets
#Convert W[index].values() to a list, since Python 3 returns a "view" of the dictionary with .values()
#Then remove the list aspect through [0], and obtain the values from the nested dict through .items()
if combined_dict.items() <= list(W[i].values())[0].items():
#Get the weight
query_prob+=list(W[i].items())[0][0]
return (query_prob/num_samples)
#MH_prob = compute_prob(X, e, W)
#return MH_prob
if __name__ == '__main__':
main()