Source code for dynamicgem.experiments.exp

try: import cPickle as pickle
except: import pickle
from time import time
from argparse import ArgumentParser
import importlib
import json
import networkx as nx
import itertools
import pdb
import sys
import numpy as np
import pandas as pd
sys.path.insert(0, './')

from dynamicgem.graph_generation import dynamic_SBM_graph
from dynamicgem.utils      import graph_util, plot_util
from dynamicgem.evaluation.evaluate_graph_reconstruction import expGR
from dynamicgem.evaluation.evaluate_link_prediction import expLP

methClassMap = {"dynAE": "DynAE",
                "dynAERNN": "DynAERNN",
                "dynRNN": "DynRNN",
                "rand": "RandDynamic",
                }
expMap = {"gf": "GF MAP", "lp": "LP MAP",
          "nc": "NC MAP"}


[docs]def learn_emb(MethObj, graphs, params, res_pre, m_summ): """Function to learn embedding Attributes: MethObj (obj): Object of the algorithm class graphs (Object): Networkx Graph Object params (dict): Dictionary of parameters necessary for running the experiment res_pre (str): Prefix of the filename for saving the result. m_summ (str): summary added to the filename of the result. Returns: ndarray: Learned embedding """ if params["experiments"] == ["lp"]: X = None else: print('Learning Embedding: %s' % m_summ) if not bool(int(params["load_emb"])): X, learn_t = MethObj.learn_embeddings(graphs) print('\tTime to learn embedding: %f sec' % learn_t) pickle.dump( X, open('%s_%s_%d.emb' % (res_pre, m_summ, len(graphs)), 'wb') ) pickle.dump(learn_t, open('%s_%s_%d.learnT' % (res_pre, m_summ, len(graphs)), 'wb')) else: X = pickle.load( open('%s_%s_%d.emb' % (res_pre, m_summ, len(graphs)), 'rb') ) try: learn_t = pickle.load( open('%s_%s_%d.learnT' % (res_pre, m_summ, len(graphs)), 'rb') ) print('\tTime to learn emb.: %f sec' % learn_t) except IOError: print('\tTime info not found') return X
[docs]def run_exps(MethObj, meth, dim, graphs, data_set, params): """Function to run the experiment Attributes: MethObj (obj): Object of the algorithm class meth (str): Name of the method dim (int): Dimension of the embedding graphs (Object): Networkx Graph Object data_set (str): Name of the dataset to be used for the experiment params (dict): Dictionary of parameters necessary for running the experiment Returns: ndarray: Learned embedding """ m_summ = '%s_%d' % (meth, dim) res_pre = "results/%s" % data_set n_r = params["rounds"] T = len(graphs) X = [None] * (T - T // 2) for t in range(T // 2, T): X[t - T // 2] = learn_emb( MethObj, graphs[:t], params, res_pre, m_summ ) gr, lp = [0] * (T - T // 2), [0] * (T - T // 2) if "gr" in params["experiments"]: for t in range(T // 2, T): gr[t - T // 2] = expGR( graphs[t], MethObj, X[t - T // 2], params["n_sample_nodes"], n_r, res_pre, m_summ, file_suffix=data_set+'_'+str(dim),is_undirected=params["is_undirected"], sampling_scheme=params["samp_scheme"] ) if "lp" in params["experiments"]: lp = expLP(graphs, MethObj, n_r, res_pre, m_summ, params["n_sample_nodes"], is_undirected=params["is_undirected"], sampling_scheme=params["samp_scheme"]) return gr, lp
[docs]def get_max(val, val_max, idx, idx_max): """Function to get the maximum value.""" if val > val_max: return val, idx else: return val_max, idx_max
[docs]def choose_best_hyp(data_set, graphs, params): """Function to get the best hyperparameter using a grid search Attributes: data_set (str): Name of the dataset to be used for the experiment graphs (Object): Networkx Graph Object params (dict): Dictionary of parameters necessary for running the experiment """ # Load range of hyper parameters to test on try: model_hyp_range = json.load( open('experiments/config/%s_hypRange2.conf' % data_set, 'r') ) except IOError: model_hyp_range = json.load( open('experiments/config/default_hypRange.conf', 'r') ) # Test each hyperparameter for each method and store the best for meth in params["methods"]: dim = 128 MethClass = getattr( importlib.import_module("embedding.%s" % meth), methClassMap[meth] ) meth_hyp_range = model_hyp_range[meth] gr_max, lp_max = 0, 0 gr_hyp, lp_hyp = 0, 0 gr_hyp, lp_hyp = {meth: {}}, {meth: {}} # Test each hyperparameter ev_cols = ["GR MAP", "LP MAP"] hyp_df = pd.DataFrame( columns=meth_hyp_range.keys() + ev_cols + ["Round Id"] ) hyp_r_idx = 0 for hyp in itertools.product(*meth_hyp_range.values()): hyp_d = {"d": dim} hyp_d.update(dict(zip(meth_hyp_range.keys(), hyp))) print(hyp_d) if meth == "dynAE" or meth == "dynRNN" or meth == "dynAERNN": hyp_d.update({ "modelfile": [ "./intermediate/encoder_model_%s_%d.json" % (data_set, dim), "./intermediate/decoder_model_%s_%d.json" % (data_set, dim) ], "weightfile": [ "./intermediate/encoder_weights_%s_%d.hdf5" % (data_set, dim), "./intermediate/decoder_weights_%s_%d.hdf5" % (data_set, dim) ] }) # elif meth == "gf" or meth == "node2vec": # hyp_d.update({"data_set": data_set}) MethObj = MethClass(hyp_d) gr, lp = run_exps(MethObj, meth, dim, graphs, data_set, params) gr_m, lp_m = np.mean(gr), np.mean(lp) gr_max, gr_hyp[meth] = get_max(gr_m, gr_max, hyp_d, gr_hyp[meth]) lp_max, lp_hyp[meth] = get_max(lp_m, lp_max, hyp_d, lp_hyp[meth]) hyp_df_row = dict(zip(meth_hyp_range.keys(), hyp)) for r_id in range(params["rounds"]): hyp_df.loc[hyp_r_idx, meth_hyp_range.keys()] = \ pd.Series(hyp_df_row) # hyp_df.loc[hyp_r_idx, ev_cols + ["Round Id"]] = \ # [gr[min(r_id, len(gr) -1)], lp[r_id], r_id] hyp_df.loc[hyp_r_idx, ev_cols + ["Round Id"]] = \ [np.mean(np.array(gr)), np.mean(np.array(lp)), r_id] hyp_r_idx += 1 exp_param = params["experiments"] for exp in exp_param: hyp_df.to_hdf( "intermediate/%s_%s_%s_%s_hyp.h5" % (data_set, meth, exp, params["samp_scheme"]), "df" ) plot_util.plot_hyp(meth_hyp_range.keys(), exp_param, meth, data_set, s_sch=params["samp_scheme"]) # Store the best hyperparameter opt_hyp_f_pre = 'experiments/config/%s_%s_%s' % ( data_set, meth, params["samp_scheme"] ) if gr_max: with open('%s_gr.conf' % opt_hyp_f_pre, 'w') as f: f.write(json.dumps(gr_hyp, indent=4)) if lp_max: with open('%s_lp.conf' % opt_hyp_f_pre, 'w') as f: f.write(json.dumps(lp_hyp, indent=4))
[docs]def call_plot_hyp(data_set, params): """Function to plot the result of hyperparameter search Attributes: data_set (str): Name of the dataset to be used for the experiment params (dict): Dictionary of parameters necessary for running the experiment """ # Load range of hyper parameters tested on to plot try: model_hyp_range = json.load( open('experiments/config/%s_hypRange.conf' % data_set, 'r') ) except IOError: model_hyp_range = json.load( open('experiments/config/default_hypRange.conf', 'r') ) for meth in params["methods"]: meth_hyp_range = model_hyp_range[meth] exp_param = params["experiments"] plot_util.plot_hyp(meth_hyp_range.keys(), exp_param, meth, data_set, s_sch=params["samp_scheme"])
[docs]def call_plot_hyp_all(data_sets, params): """Function to plot the the result of all the hyper-parameters Attributes: data_set (str): Name of the dataset to be used for the experiment params (dict): Dictionary of parameters necessary for running the experiment """ # Load range of hyper parameters tested on to plot try: model_hyp_range = json.load( open('experiments/config/%s_hypRange.conf' % data_sets[0], 'r') ) except IOError: model_hyp_range = json.load( open('experiments/config/default_hypRange.conf', 'r') ) for meth in params["methods"]: meth_hyp_range = model_hyp_range[meth] exp_param = params["experiments"] plot_util.plot_hyp_all(meth_hyp_range.keys(), exp_param, meth, data_sets, s_sch=params["samp_scheme"])
[docs]def call_exps(params, data_set, n_graphs): """Function to run the experiments Attributes: n_graphs (int): Total number of graphs in a sequence. data_set (str): Name of the dataset to be used for the experiment params (dict): Dictionary of parameters necessary for running the experiment """ # Load Dataset print('Dataset: %s' % data_set) if data_set == "sbm": node_num = 500 community_num = 2 node_change_num = 5 length = n_graphs sbm_gs = dynamic_SBM_graph.get_community_diminish_series_v2( node_num, community_num, length, 1, node_change_num ) graphs = [g[0] for g in sbm_gs] else: graphs = [] for t in range(n_graphs): G = nx.read_gpickle( 'data/%s/graph_%d.gpickle' % (data_set, t) ) G, nodeListMap = graph_util.get_lcc(G) graphs.append(G) print('Graph %d:' % t) graph_util.print_graph_stats(G) # Search through the hyperparameter space if params["find_hyp"]: choose_best_hyp(data_set, graphs, params) # Load best hyperparameter and test it again on new test data for d, meth, exp in itertools.product( params["dimensions"], params["methods"], params["experiments"] ): dim = int(d) MethClass = getattr( importlib.import_module("embedding.%s" % meth), methClassMap[meth] ) opt_hyp_f_pre = 'experiments/config/%s_%s_%s' % ( data_set, meth, params["samp_scheme"] ) try: model_hyp = json.load( open('%s_%s.conf' % (opt_hyp_f_pre, exp), 'r') ) except IOError: print('Default hyperparameter of the method chosen') model_hyp = json.load( open('experiments/config/%s.conf' % meth, 'r') ) hyp = {} hyp.update(model_hyp[meth]) hyp.update({"d": dim}) if meth == "dynAE" or meth == "dynAERNN" or meth == "dynRNN": hyp.update({ "modelfile": [ "./intermediate/encoder_model_%s_%d.json" % (data_set, dim), "./intermediate/decoder_model_%s_%d.json" % (data_set, dim) ], "weightfile": [ "./intermediate/encoder_weights_%s_%d.hdf5" % (data_set, dim), "./intermediate/decoder_weights_%s_%d.hdf5" % (data_set, dim) ] }) elif meth == "gf" or meth == "node2vec": hyp.update({"data_set": data_set}) MethObj = MethClass(hyp) run_exps(MethObj, meth, dim, graphs, data_set, params)
if __name__ == '__main__': ''' Sample usage python experiments/exp.py -data sbm -dim 128 -meth sdne -exp gr,lp ''' t1 = time() parser = ArgumentParser(description='Graph Embedding Experiments') parser.add_argument('-data', '--data_sets', help='dataset names (default: sbm)') parser.add_argument('-dim', '--dimensions', help='embedding dimensions list(default: 2^1 to 2^8)') parser.add_argument('-meth', '--methods', help='method list (default: all methods)') parser.add_argument('-exp', '--experiments', help='exp list (default: gr,lp)') parser.add_argument('-lemb', '--load_emb', help='load saved embeddings (default: False)') parser.add_argument('-lexp', '--load_exp', help='load saved experiment results (default: False)') parser.add_argument('-rounds', '--rounds', help='number of rounds (default: 5)') parser.add_argument('-plot', '--plot', help='plot the results (default: True)') parser.add_argument('-plot_d', '--plot_d', help='plot the results wrt dims(default: True)') parser.add_argument('-hyp_plot', '--hyp_plot', help='plot the hyperparameter results (default: True)') parser.add_argument('-hyp_plot_all', '--hyp_plot_all', help='plot the hyperparameter results (all) (default: True)') parser.add_argument('-find_hyp', '--find_hyp', help='find best hyperparameters (default: False)') parser.add_argument('-saveMAP', '--save_MAP', help='save MAP in a latex table (default: False)') parser.add_argument('-n_samples', '--n_sample_nodes', help='number of sampled nodes (default: 1024)') parser.add_argument('-s_sch', '--samp_scheme', help='sampling scheme (default: u_rand)') parser.add_argument('-n_graphs', '--n_graphs', help='# of graphs (default: 5)') params = json.load(open('experiments/config/params.conf', 'r')) args = vars(parser.parse_args()) print(args) for k, v in args.iteritems(): if v is not None: params[k] = v params["experiments"] = params["experiments"].split(',') params["data_sets"] = params["data_sets"].split(',') params["rounds"] = int(params["rounds"]) params["n_sample_nodes"] = int(params["n_sample_nodes"]) params["is_undirected"] = bool(int(params["is_undirected"])) params["plot_d"] = bool(int(params["plot_d"])) params["plot"] = bool(int(params["plot"])) params["hyp_plot"] = bool(int(params["hyp_plot"])) params["hyp_plot_all"] = bool(int(params["hyp_plot_all"])) t_pred = int(params["n_graphs"]) - int(params["n_graphs"]) // 2 if params["methods"] == "all": params["methods"] = methClassMap.keys() else: params["methods"] = params["methods"].split(',') params["dimensions"] = params["dimensions"].split(',') print(params) for data_set in params["data_sets"]: if not int(params["load_exp"]): call_exps(params, data_set, int(params["n_graphs"])) if int(params["plot"]): res_pre = "results/%s" % data_set plot_util.plotExpRes(res_pre, params["methods"], params["experiments"], params["dimensions"], 'plots/%s_%s' % (data_set, params["samp_scheme"]), params["rounds"], params["plot_d"], t_pred, params["samp_scheme"]) if int(params["hyp_plot"]): call_plot_hyp(data_set, params) if int(params["hyp_plot_all"]): call_plot_hyp_all(params["data_sets"], params)