Added all functions. Results not correct.

2021-09-21 16:39:47 -05:00 · 2021-09-21 16:39:47 -05:00 · da54e609f8
commit da54e609f8
parent b6d03c3a7f
2 changed files with 298 additions and 30 deletions
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit 9e3e13fe6b2b982a12a5a3f391f91107e95f13b8
--- a/main.py
+++ b/main.py
@ -6,37 +6,64 @@
 import json
 import random
 import math
-import sys
 import os
-
-#pwd = os.getcwd()
-#sys.path.append(pwd +'/gen-bn/')
-#sys.path.append("./gen-bn")
+import numpy as np
+from shutil import copyfile

 import gen_bn.gen_bn

-#from collections import defaultdict
+NETWORK_SIZE = 20
+NETWORK_TYPE = "polytree"
+NUM_SAMPLES = 200
+ALPHA = 0.75

 def main():
    #Generate a new BN. Specify type and number of nodes in network
-    gen_json("dag", 5)
+    print("Generating a Bayesian Network of type", NETWORK_TYPE, "and with", NETWORK_SIZE, "nodes.")
+    gen_json(NETWORK_TYPE, NETWORK_SIZE)
    
    #Get our BN
    bayes_net = import_bayes()
    
    #Generate random evidence
    E = gen_ev(bayes_net)
-    
+    print("Generating random evidence:", E)
+
    #Generate a random query that is not an evidence variable in the form of {var : val}
    X = gen_query(E, bayes_net)
-
-    #Get W from LW
-    W = likelihood_weighting(X, E, bayes_net, 10)
-    
-    #Print if desired
+    print("Generating random query:", X)
    print()
-    for key, value in W.items():
-        print(key, ' : ', value)
+
+    #Get probability of query from LW given evidence
+    LW_prob = likelihood_weighting(X, E, bayes_net, NUM_SAMPLES)
+    print("Probability of", X, "given", E, "with the LW algorithm and", NUM_SAMPLES, "samples is:")
+    print(LW_prob)
+
+    GS_prob, tmp = gibbs_sampling(X, E, bayes_net, NUM_SAMPLES)
+   
+    print("Probability of", X, "given", E, "with the GS algorithm and", NUM_SAMPLES, "samples is:")
+    print(GS_prob)
+
+    #Get probability of query from MH given evidence
+    MH_prob = metropolis_hastings(X, E, bayes_net, NUM_SAMPLES, ALPHA)
+    print("Probability of", X, "given", E, "with the MH algorithm and", NUM_SAMPLES, "samples, and a", ALPHA*100, "/", 100-(ALPHA*100),  "split is:")
+    print(MH_prob)
+
+    query_var =  (list(X.items())[0][0])
+    run_exact(query_var)
+
+def run_exact(query):
+    #Get dir of exact_inference
+    os.chdir("./exact_inference")
+    dirname = os.getcwd()
+    #Go back up and into the gen_bn folder
+    os.chdir("..")
+    os.chdir("./gen_bn")
+    #copy the file to the exact_inference folder
+    copyfile("bn.json", dirname+'/bn.json')
+    os.chdir("../exact_inference")
+    #Run the exact_inference on the query variable
+    os.system('python exact_inference.py -f bn.json' + ' ' + '-q' + ' ' + str(query))

 #Generate a new BN.
 #Input: Type ("dag", or "polytree")
@ -57,11 +84,11 @@ def import_bayes():
 def gen_ev(bayes_net):
    total_nodes = len(bayes_net)
    #Arbitrarily, let's only generate total_nodes/2 (rounded up) evidence variables at most, but at least 1
-    num_ev = random.randint(0, int(math.ceil(total_nodes/2)))
+    num_ev = random.randint(1, int(math.ceil(total_nodes/2)))
    fixed_ev = []
    #Go through and generate nodes that will be fixed
    for i in range(num_ev):
-        fixed_var = random.randint(1, total_nodes-1)
+        fixed_var = random.randint(0, total_nodes-1)
        if fixed_var not in fixed_ev:
            fixed_ev.append(fixed_var)
    #Now generate random values for the ev
@ -82,7 +109,7 @@ def gen_query(ev, bayes_net):
    for e in ev_vars:
        if int(e) in possible_vars:
            possible_vars.remove(int(e))
-    query = random.choice(possible_vars)
+    query = str(random.choice(possible_vars))
    rand_prob = random.random()
    if rand_prob >= 0.5:
        val = True
@ -91,7 +118,7 @@ def gen_query(ev, bayes_net):
    
    return {query : val}

-#Checks if node has parents
+#Checks if node has parents. If not, it is a root
 def is_root(node, BN):
    return (BN[node]["parents"]) == []

@ -115,7 +142,7 @@ def get_parents(node, BN):
 #E is a dict in the form of {"Node" : Value}

 #Compute the estimate of P(X|e), where X is the query variable, and e is the observed value for variables E
-def likelihood_weighting(X, e, bayes_net, num_samples):
+def likelihood_weighting(X, e, bayes_net, num_samples, MH=0):
    W = {}

    for i in range(num_samples):
@ -166,12 +193,18 @@ def likelihood_weighting(X, e, bayes_net, num_samples):
        #If the sample wasn't already in W, put it in there now    
        if not written:                 
            W[len(W)] = {w : samples}
-    return W
-
+    
+    #use for MH alg
+    if(MH is not 0):
+        return W 
+    
+    prob = compute_prob(X, e, W)
+    
+    return prob

 #Return the probability of a node and the value dict, given the current evidence and fixed evidence
 #Uses recursion to pull probabilites and values down through the network
-def get_probability(node, samples, ev, bayes_net, w):
+def get_probability(node, samples, ev, bayes_net, w, G=0):
    parents = get_parents(node, bayes_net)
    for parent in parents:
        #If we already know the value of the parent, no need to reobtain
@ -192,11 +225,15 @@ def get_probability(node, samples, ev, bayes_net, w):
                samples, prob,  w = translate_ev(gparents, parent, ev, samples, bayes_net, w)
    
    #Now that we have all the parents' values, we can get the node value, probability, and update samples
-    samples, prob, w = translate_ev(parents, node, ev, samples, bayes_net, w)
+    samples, prob, w = translate_ev(parents, node, ev, samples, bayes_net, w, G)
    return samples, prob, w

 #Given a node and its parents, determine the node's value and it's probability
-def translate_ev(parents, node, ev, samples, bayes_net, w):
+def translate_ev(parents, node, ev, samples, bayes_net, w, G=0):
+    #If G=1, it means we already set the state space to specific values.
+    #Meaning, we only need to compute the prob of that happening, NOT set values
+    #Call a different function instead
+
    #Sort in ascending order
    parents.sort()
    node = str(node)
@ -227,13 +264,243 @@ def translate_ev(parents, node, ev, samples, bayes_net, w):
        w = w * table_prob

    return samples, table_prob, w
-      

-def gibbs_sampling():
-    print("Hello")
+def get_children(node, BN):
+    children = []
+    for x in range(len(BN)):
+        if node in BN[str(x)]["parents"]:
+            children.append(str(x))
+    return children

-def metropolis_hastings():
-    print("Hello")
+def product(nums):
+    result = 1
+    for num in nums:
+        result *= num
+    return result
+
+#Given a node and state_space values, get prob from CPT
+def get_cpt(node, samples, bayes_net):
+    parents = bayes_net[str(node)]["parents"]
+    parents.sort()
+    value_list = []
+
+    for parent in parents: 
+        value = samples[str(parent)]
+        value_list.append(value)
+    
+    for i in range(2**len(parents)):
+        #If the truth table matches the value combination we have
+        if bayes_net[str(node)]["prob"][i][0] == value_list:
+            return bayes_net[str(node)]["prob"][i][1]
+
+
+def mb(x_node, e, bayes_net, state_space):
+    #x_node = (list(X.items())[0][0])
+    #print("NODE IS", x_node)
+    state_space[x_node] = True
+    #print("True SS")
+    #print(state_space)
+    x_true = get_cpt(x_node, state_space, bayes_net) * product(get_cpt(child, state_space, bayes_net) for child in get_children(x_node, bayes_net))
+    #print(x_true)
+    state_space[x_node] = False
+    #print("False SS")
+    #print(state_space)
+    x_false = get_cpt(x_node, state_space, bayes_net) * product(get_cpt(child, state_space, bayes_net) for child in get_children(x_node, bayes_net))
+    #print(x_false)
+    #print()
+    return x_true, x_false
+
+#Given a query, the evidence, and a dict of samples, compute the prob of the query
+def compute_prob(X, e, W):
+    #Combine the query and ev dicts for easier computation
+    combined_dict = {**X, **e}
+    #Initialize the probability of the query
+    query_prob = 0
+    ev_prob = 0
+    #print(W)
+   #Go through all the samples
+    for i in range(len(W)):
+        #Easy way to find all the instances where query var and ev match the samples - use subsets
+        #Convert W[index].values() to a list, since Python 3 returns a "view" of the dictionary with .values()
+        #Then remove the list aspect through [0], and obtain the values from the nested dict through .items()
+        if combined_dict.items() <= list(W[i].values())[0].items():
+            #Get the weight
+            query_prob+=list(W[i].items())[0][0]
+        #See if evidence is a subset
+        if e.items() <= list(W[i].values())[0].items():
+            ev_prob+=list(W[i].items())[0][0]
+   
+    if(ev_prob==0):
+        return 0
+    else:
+        return (query_prob/ev_prob)
+
+def gibbs_sampling(X, e, bayes_net, num_samples):
+    #State of the network, init'd with the query and ev
+    #x = {**X, **e}
+    x = {**e}
+
+    #Counts for each value of X
+    C = {}
+    for j in range(len(bayes_net)):
+        C[str(j)] = 0
+    #Get a list of non-ev variables to make things easier
+    Z = []
+    for node in bayes_net:
+        #if node not in e and node not in X:
+        if node not in e:
+            Z.append(node)
+    
+    #Initialize the rest of the network randomly
+    for z in Z:
+        rand_prob = random.random()
+        if rand_prob >=0.5:
+            x[z] = True
+        else:
+            x[z] = False
+
+    for k in range(num_samples):
+        zi = random.choice(Z)
+        #Reuse the get_prob function, even though we don't care about maintaining a samples list or weight
+        #samples, prob, w = get_probability(str(zi), {}, e, bayes_net, 0)
+        z_true, z_false = mb(str(zi), e, bayes_net, x)
+        #Gen a random number to determine the value based on probability
+        value_prob = random.random()
+        if value_prob >= z_true:
+            x[zi] = True
+            C[zi]+=1
+
+        else:
+            x[zi] = False
+
+    for j in range(len(bayes_net)):
+        C[str(j)]/=num_samples
+
+    query_var =  (list(X.items())[0][0])
+    query_val =  (list(X.values())[0])
+
+    #print(C)
+    if(query_val):
+        GS_Prob = C[str(query_var)]
+    else:
+        GS_Prob = 1.00000 - C[str(query_var)]
+
+    return GS_Prob, x
+
+#Function for using log-probabilities for preventing the underflow from MH as advised by Dr. Sen
+def compute_log(prob):
+    return -0.5 * np.sum(prob ** 2)
+
+def acceptance(xprime, x, compute_log):
+    return min(1, np.exp(compute_log(xprime) - compute_log(x)))
+
+def metropolis_hastings(X, e, bayes_net, num_samples, alpha):
+    state_space = {**e}
+    Z = []
+    
+    #Counts for each value of X
+    W = {}
+    C = {}
+    for j in range(len(bayes_net)):
+        C[str(j)] = 0
+
+    for node in bayes_net:
+        #if node not in e and node not in X:
+        if node not in e:
+            Z.append(node)
+    
+    #Initialize the rest of the network randomly
+    for z in Z:
+        rand_prob = random.random()
+        if rand_prob >=0.5:
+            state_space[z] = True
+        else:
+            state_space[z] = False
+   
+    #3 runs: 95% to run Gibbs for x', 5% for weighted sample. Then 85/15, 75/25
+    #100% acceptance
+    for num in range(num_samples):
+        proposal_choice = random.random()
+        
+        #Weighted-Sample
+        if(proposal_choice <= (1-alpha)):
+            tmp = likelihood_weighting(X, e, bayes_net, 1, 1)
+            sample = list(tmp[0].values())[0]
+            
+            #If this sample is already in W, don't add a new sample - only adjust the weight
+            written = False
+            for k in range(len(W)):
+                #If sample is in W
+                if sample in W[k].values():
+                    #Pull the weight that's associated with the sample
+                    key = list(W[k].items())[0][0]
+                    #Increment count  
+                    new_key = key + 1
+                    #Store it all back into W
+                    W[k] = {new_key : sample}
+                    #Make note that we've already written to W in this loop, so we don't write it again
+                    written = True
+
+            #If the sample wasn't already in W, put it in there now    
+            if not written:                 
+                W[len(W)] = {1 : sample}
+        
+            #for key, value in W.items():
+            #    print(key, ' : ', value)
+            #zi = random.choice(Z)
+            #state_space, prob, w = get_probability(zi, state_space, bayes_net, 0)
+        #Gibbs
+        else:
+            zi = random.choice(Z)
+            #Reuse the get_prob function, even though we don't care about maintaining a samples list or weight
+            #samples, prob, w = get_probability(str(zi), {}, e, bayes_net, 0)
+            z_true, z_false = mb(str(zi), e, bayes_net, state_space)
+            #Gen a random number to determine the value based on probability
+            value_prob = random.random()
+            if value_prob >= z_true:
+                state_space[zi] = True
+                C[zi]+=1
+
+            else:
+                state_space[zi] = False
+
+            #If this sample is already in W, don't add a new sample - only adjust the weight
+            written = False
+            for tmp in range(len(W)):
+                #If sample is in W
+                if state_space in W[tmp].values():
+                    #Pull the weight that's associated with the sample
+                    key = list(W[tmp].items())[0][0]
+                    #Increment count 
+                    new_key = key + 1
+                    #Store it all back into W
+                    W[tmp] = {new_key : state_space}
+                    #Make note that we've already written to W in this loop, so we don't write it again
+                    written = True
+
+            #If the sample wasn't already in W, put it in there now    
+            if not written:                 
+                W[len(W)] = {1 : state_space}
+
+    #for key, value in W.items():
+    #    print(key, ' : ', value)    
+    
+    #Combine the query and ev dicts for easier computation
+    combined_dict = {**X, **e}
+    #Initialize the probability of the query
+    query_prob = 0
+    #Go through all the samples
+    for i in range(len(W)):
+        #Easy way to find all the instances where query var and ev match the samples - use subsets
+        #Convert W[index].values() to a list, since Python 3 returns a "view" of the dictionary with .values()
+        #Then remove the list aspect through [0], and obtain the values from the nested dict through .items()
+        if combined_dict.items() <= list(W[i].values())[0].items():
+            #Get the weight
+            query_prob+=list(W[i].items())[0][0]
+    
+    return (query_prob/num_samples)
+    #MH_prob = compute_prob(X, e, W)
+    #return MH_prob

 if __name__ == '__main__':
    main()
				`@ -0,0 +1 @@`
				`Subproject commit 9e3e13fe6b2b982a12a5a3f391f91107e95f13b8`