From da54e609f8c142c27f0601d5863e090080f6ba4d Mon Sep 17 00:00:00 2001 From: BuildTools Date: Tue, 21 Sep 2021 16:39:47 -0500 Subject: [PATCH] Added all functions. Results not correct. --- exact_inference | 1 + main.py | 327 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 298 insertions(+), 30 deletions(-) create mode 160000 exact_inference diff --git a/exact_inference b/exact_inference new file mode 160000 index 0000000..9e3e13f --- /dev/null +++ b/exact_inference @@ -0,0 +1 @@ +Subproject commit 9e3e13fe6b2b982a12a5a3f391f91107e95f13b8 diff --git a/main.py b/main.py index 1859c00..8aab2db 100644 --- a/main.py +++ b/main.py @@ -6,37 +6,64 @@ import json import random import math -import sys import os - -#pwd = os.getcwd() -#sys.path.append(pwd +'/gen-bn/') -#sys.path.append("./gen-bn") +import numpy as np +from shutil import copyfile import gen_bn.gen_bn -#from collections import defaultdict +NETWORK_SIZE = 20 +NETWORK_TYPE = "polytree" +NUM_SAMPLES = 200 +ALPHA = 0.75 def main(): #Generate a new BN. Specify type and number of nodes in network - gen_json("dag", 5) + print("Generating a Bayesian Network of type", NETWORK_TYPE, "and with", NETWORK_SIZE, "nodes.") + gen_json(NETWORK_TYPE, NETWORK_SIZE) #Get our BN bayes_net = import_bayes() #Generate random evidence E = gen_ev(bayes_net) - + print("Generating random evidence:", E) + #Generate a random query that is not an evidence variable in the form of {var : val} X = gen_query(E, bayes_net) - - #Get W from LW - W = likelihood_weighting(X, E, bayes_net, 10) - - #Print if desired + print("Generating random query:", X) print() - for key, value in W.items(): - print(key, ' : ', value) + + #Get probability of query from LW given evidence + LW_prob = likelihood_weighting(X, E, bayes_net, NUM_SAMPLES) + print("Probability of", X, "given", E, "with the LW algorithm and", NUM_SAMPLES, "samples is:") + print(LW_prob) + + GS_prob, tmp = gibbs_sampling(X, E, bayes_net, NUM_SAMPLES) + + print("Probability of", X, "given", E, "with the GS algorithm and", NUM_SAMPLES, "samples is:") + print(GS_prob) + + #Get probability of query from MH given evidence + MH_prob = metropolis_hastings(X, E, bayes_net, NUM_SAMPLES, ALPHA) + print("Probability of", X, "given", E, "with the MH algorithm and", NUM_SAMPLES, "samples, and a", ALPHA*100, "/", 100-(ALPHA*100), "split is:") + print(MH_prob) + + query_var = (list(X.items())[0][0]) + run_exact(query_var) + +def run_exact(query): + #Get dir of exact_inference + os.chdir("./exact_inference") + dirname = os.getcwd() + #Go back up and into the gen_bn folder + os.chdir("..") + os.chdir("./gen_bn") + #copy the file to the exact_inference folder + copyfile("bn.json", dirname+'/bn.json') + os.chdir("../exact_inference") + #Run the exact_inference on the query variable + os.system('python exact_inference.py -f bn.json' + ' ' + '-q' + ' ' + str(query)) #Generate a new BN. #Input: Type ("dag", or "polytree") @@ -57,11 +84,11 @@ def import_bayes(): def gen_ev(bayes_net): total_nodes = len(bayes_net) #Arbitrarily, let's only generate total_nodes/2 (rounded up) evidence variables at most, but at least 1 - num_ev = random.randint(0, int(math.ceil(total_nodes/2))) + num_ev = random.randint(1, int(math.ceil(total_nodes/2))) fixed_ev = [] #Go through and generate nodes that will be fixed for i in range(num_ev): - fixed_var = random.randint(1, total_nodes-1) + fixed_var = random.randint(0, total_nodes-1) if fixed_var not in fixed_ev: fixed_ev.append(fixed_var) #Now generate random values for the ev @@ -82,7 +109,7 @@ def gen_query(ev, bayes_net): for e in ev_vars: if int(e) in possible_vars: possible_vars.remove(int(e)) - query = random.choice(possible_vars) + query = str(random.choice(possible_vars)) rand_prob = random.random() if rand_prob >= 0.5: val = True @@ -91,7 +118,7 @@ def gen_query(ev, bayes_net): return {query : val} -#Checks if node has parents +#Checks if node has parents. If not, it is a root def is_root(node, BN): return (BN[node]["parents"]) == [] @@ -115,7 +142,7 @@ def get_parents(node, BN): #E is a dict in the form of {"Node" : Value} #Compute the estimate of P(X|e), where X is the query variable, and e is the observed value for variables E -def likelihood_weighting(X, e, bayes_net, num_samples): +def likelihood_weighting(X, e, bayes_net, num_samples, MH=0): W = {} for i in range(num_samples): @@ -166,12 +193,18 @@ def likelihood_weighting(X, e, bayes_net, num_samples): #If the sample wasn't already in W, put it in there now if not written: W[len(W)] = {w : samples} - return W - + + #use for MH alg + if(MH is not 0): + return W + + prob = compute_prob(X, e, W) + + return prob #Return the probability of a node and the value dict, given the current evidence and fixed evidence #Uses recursion to pull probabilites and values down through the network -def get_probability(node, samples, ev, bayes_net, w): +def get_probability(node, samples, ev, bayes_net, w, G=0): parents = get_parents(node, bayes_net) for parent in parents: #If we already know the value of the parent, no need to reobtain @@ -192,11 +225,15 @@ def get_probability(node, samples, ev, bayes_net, w): samples, prob, w = translate_ev(gparents, parent, ev, samples, bayes_net, w) #Now that we have all the parents' values, we can get the node value, probability, and update samples - samples, prob, w = translate_ev(parents, node, ev, samples, bayes_net, w) + samples, prob, w = translate_ev(parents, node, ev, samples, bayes_net, w, G) return samples, prob, w #Given a node and its parents, determine the node's value and it's probability -def translate_ev(parents, node, ev, samples, bayes_net, w): +def translate_ev(parents, node, ev, samples, bayes_net, w, G=0): + #If G=1, it means we already set the state space to specific values. + #Meaning, we only need to compute the prob of that happening, NOT set values + #Call a different function instead + #Sort in ascending order parents.sort() node = str(node) @@ -227,13 +264,243 @@ def translate_ev(parents, node, ev, samples, bayes_net, w): w = w * table_prob return samples, table_prob, w - -def gibbs_sampling(): - print("Hello") +def get_children(node, BN): + children = [] + for x in range(len(BN)): + if node in BN[str(x)]["parents"]: + children.append(str(x)) + return children -def metropolis_hastings(): - print("Hello") +def product(nums): + result = 1 + for num in nums: + result *= num + return result + +#Given a node and state_space values, get prob from CPT +def get_cpt(node, samples, bayes_net): + parents = bayes_net[str(node)]["parents"] + parents.sort() + value_list = [] + + for parent in parents: + value = samples[str(parent)] + value_list.append(value) + + for i in range(2**len(parents)): + #If the truth table matches the value combination we have + if bayes_net[str(node)]["prob"][i][0] == value_list: + return bayes_net[str(node)]["prob"][i][1] + + +def mb(x_node, e, bayes_net, state_space): + #x_node = (list(X.items())[0][0]) + #print("NODE IS", x_node) + state_space[x_node] = True + #print("True SS") + #print(state_space) + x_true = get_cpt(x_node, state_space, bayes_net) * product(get_cpt(child, state_space, bayes_net) for child in get_children(x_node, bayes_net)) + #print(x_true) + state_space[x_node] = False + #print("False SS") + #print(state_space) + x_false = get_cpt(x_node, state_space, bayes_net) * product(get_cpt(child, state_space, bayes_net) for child in get_children(x_node, bayes_net)) + #print(x_false) + #print() + return x_true, x_false + +#Given a query, the evidence, and a dict of samples, compute the prob of the query +def compute_prob(X, e, W): + #Combine the query and ev dicts for easier computation + combined_dict = {**X, **e} + #Initialize the probability of the query + query_prob = 0 + ev_prob = 0 + #print(W) + #Go through all the samples + for i in range(len(W)): + #Easy way to find all the instances where query var and ev match the samples - use subsets + #Convert W[index].values() to a list, since Python 3 returns a "view" of the dictionary with .values() + #Then remove the list aspect through [0], and obtain the values from the nested dict through .items() + if combined_dict.items() <= list(W[i].values())[0].items(): + #Get the weight + query_prob+=list(W[i].items())[0][0] + #See if evidence is a subset + if e.items() <= list(W[i].values())[0].items(): + ev_prob+=list(W[i].items())[0][0] + + if(ev_prob==0): + return 0 + else: + return (query_prob/ev_prob) + +def gibbs_sampling(X, e, bayes_net, num_samples): + #State of the network, init'd with the query and ev + #x = {**X, **e} + x = {**e} + + #Counts for each value of X + C = {} + for j in range(len(bayes_net)): + C[str(j)] = 0 + #Get a list of non-ev variables to make things easier + Z = [] + for node in bayes_net: + #if node not in e and node not in X: + if node not in e: + Z.append(node) + + #Initialize the rest of the network randomly + for z in Z: + rand_prob = random.random() + if rand_prob >=0.5: + x[z] = True + else: + x[z] = False + + for k in range(num_samples): + zi = random.choice(Z) + #Reuse the get_prob function, even though we don't care about maintaining a samples list or weight + #samples, prob, w = get_probability(str(zi), {}, e, bayes_net, 0) + z_true, z_false = mb(str(zi), e, bayes_net, x) + #Gen a random number to determine the value based on probability + value_prob = random.random() + if value_prob >= z_true: + x[zi] = True + C[zi]+=1 + + else: + x[zi] = False + + for j in range(len(bayes_net)): + C[str(j)]/=num_samples + + query_var = (list(X.items())[0][0]) + query_val = (list(X.values())[0]) + + #print(C) + if(query_val): + GS_Prob = C[str(query_var)] + else: + GS_Prob = 1.00000 - C[str(query_var)] + + return GS_Prob, x + +#Function for using log-probabilities for preventing the underflow from MH as advised by Dr. Sen +def compute_log(prob): + return -0.5 * np.sum(prob ** 2) + +def acceptance(xprime, x, compute_log): + return min(1, np.exp(compute_log(xprime) - compute_log(x))) + +def metropolis_hastings(X, e, bayes_net, num_samples, alpha): + state_space = {**e} + Z = [] + + #Counts for each value of X + W = {} + C = {} + for j in range(len(bayes_net)): + C[str(j)] = 0 + + for node in bayes_net: + #if node not in e and node not in X: + if node not in e: + Z.append(node) + + #Initialize the rest of the network randomly + for z in Z: + rand_prob = random.random() + if rand_prob >=0.5: + state_space[z] = True + else: + state_space[z] = False + + #3 runs: 95% to run Gibbs for x', 5% for weighted sample. Then 85/15, 75/25 + #100% acceptance + for num in range(num_samples): + proposal_choice = random.random() + + #Weighted-Sample + if(proposal_choice <= (1-alpha)): + tmp = likelihood_weighting(X, e, bayes_net, 1, 1) + sample = list(tmp[0].values())[0] + + #If this sample is already in W, don't add a new sample - only adjust the weight + written = False + for k in range(len(W)): + #If sample is in W + if sample in W[k].values(): + #Pull the weight that's associated with the sample + key = list(W[k].items())[0][0] + #Increment count + new_key = key + 1 + #Store it all back into W + W[k] = {new_key : sample} + #Make note that we've already written to W in this loop, so we don't write it again + written = True + + #If the sample wasn't already in W, put it in there now + if not written: + W[len(W)] = {1 : sample} + + #for key, value in W.items(): + # print(key, ' : ', value) + #zi = random.choice(Z) + #state_space, prob, w = get_probability(zi, state_space, bayes_net, 0) + #Gibbs + else: + zi = random.choice(Z) + #Reuse the get_prob function, even though we don't care about maintaining a samples list or weight + #samples, prob, w = get_probability(str(zi), {}, e, bayes_net, 0) + z_true, z_false = mb(str(zi), e, bayes_net, state_space) + #Gen a random number to determine the value based on probability + value_prob = random.random() + if value_prob >= z_true: + state_space[zi] = True + C[zi]+=1 + + else: + state_space[zi] = False + + #If this sample is already in W, don't add a new sample - only adjust the weight + written = False + for tmp in range(len(W)): + #If sample is in W + if state_space in W[tmp].values(): + #Pull the weight that's associated with the sample + key = list(W[tmp].items())[0][0] + #Increment count + new_key = key + 1 + #Store it all back into W + W[tmp] = {new_key : state_space} + #Make note that we've already written to W in this loop, so we don't write it again + written = True + + #If the sample wasn't already in W, put it in there now + if not written: + W[len(W)] = {1 : state_space} + + #for key, value in W.items(): + # print(key, ' : ', value) + + #Combine the query and ev dicts for easier computation + combined_dict = {**X, **e} + #Initialize the probability of the query + query_prob = 0 + #Go through all the samples + for i in range(len(W)): + #Easy way to find all the instances where query var and ev match the samples - use subsets + #Convert W[index].values() to a list, since Python 3 returns a "view" of the dictionary with .values() + #Then remove the list aspect through [0], and obtain the values from the nested dict through .items() + if combined_dict.items() <= list(W[i].values())[0].items(): + #Get the weight + query_prob+=list(W[i].items())[0][0] + + return (query_prob/num_samples) + #MH_prob = compute_prob(X, e, W) + #return MH_prob if __name__ == '__main__': main() \ No newline at end of file