try: import cPickle as pickle
except: import pickle
from time import time
from argparse import ArgumentParser
import importlib
import json
import networkx as nx
import itertools
import pdb
import sys
import numpy as np
import pandas as pd
sys.path.insert(0, './')
from dynamicgem.graph_generation import dynamic_SBM_graph
from dynamicgem.utils import graph_util, plot_util
from dynamicgem.evaluation.evaluate_graph_reconstruction import expGR
from dynamicgem.evaluation.evaluate_link_prediction import expLP
methClassMap = {"dynAE": "DynAE",
"dynAERNN": "DynAERNN",
"dynRNN": "DynRNN",
"rand": "RandDynamic",
}
expMap = {"gf": "GF MAP", "lp": "LP MAP",
"nc": "NC MAP"}
[docs]def learn_emb(MethObj, graphs, params, res_pre, m_summ):
"""Function to learn embedding
Attributes:
MethObj (obj): Object of the algorithm class
graphs (Object): Networkx Graph Object
params (dict): Dictionary of parameters necessary for running the experiment
res_pre (str): Prefix of the filename for saving the result.
m_summ (str): summary added to the filename of the result.
Returns:
ndarray: Learned embedding
"""
if params["experiments"] == ["lp"]:
X = None
else:
print('Learning Embedding: %s' % m_summ)
if not bool(int(params["load_emb"])):
X, learn_t = MethObj.learn_embeddings(graphs)
print('\tTime to learn embedding: %f sec' % learn_t)
pickle.dump(
X,
open('%s_%s_%d.emb' % (res_pre, m_summ, len(graphs)), 'wb')
)
pickle.dump(learn_t,
open('%s_%s_%d.learnT' % (res_pre, m_summ, len(graphs)), 'wb'))
else:
X = pickle.load(
open('%s_%s_%d.emb' % (res_pre, m_summ, len(graphs)),
'rb')
)
try:
learn_t = pickle.load(
open('%s_%s_%d.learnT' % (res_pre, m_summ, len(graphs)),
'rb')
)
print('\tTime to learn emb.: %f sec' % learn_t)
except IOError:
print('\tTime info not found')
return X
[docs]def run_exps(MethObj, meth, dim, graphs, data_set, params):
"""Function to run the experiment
Attributes:
MethObj (obj): Object of the algorithm class
meth (str): Name of the method
dim (int): Dimension of the embedding
graphs (Object): Networkx Graph Object
data_set (str): Name of the dataset to be used for the experiment
params (dict): Dictionary of parameters necessary for running the experiment
Returns:
ndarray: Learned embedding
"""
m_summ = '%s_%d' % (meth, dim)
res_pre = "results/%s" % data_set
n_r = params["rounds"]
T = len(graphs)
X = [None] * (T - T // 2)
for t in range(T // 2, T):
X[t - T // 2] = learn_emb(
MethObj, graphs[:t], params, res_pre, m_summ
)
gr, lp = [0] * (T - T // 2), [0] * (T - T // 2)
if "gr" in params["experiments"]:
for t in range(T // 2, T):
gr[t - T // 2] = expGR(
graphs[t], MethObj,
X[t - T // 2], params["n_sample_nodes"],
n_r, res_pre,
m_summ, file_suffix=data_set+'_'+str(dim),is_undirected=params["is_undirected"],
sampling_scheme=params["samp_scheme"]
)
if "lp" in params["experiments"]:
lp = expLP(graphs, MethObj,
n_r, res_pre,
m_summ, params["n_sample_nodes"],
is_undirected=params["is_undirected"],
sampling_scheme=params["samp_scheme"])
return gr, lp
[docs]def get_max(val, val_max, idx, idx_max):
"""Function to get the maximum value."""
if val > val_max:
return val, idx
else:
return val_max, idx_max
[docs]def choose_best_hyp(data_set, graphs, params):
"""Function to get the best hyperparameter using a grid search
Attributes:
data_set (str): Name of the dataset to be used for the experiment
graphs (Object): Networkx Graph Object
params (dict): Dictionary of parameters necessary for running the experiment
"""
# Load range of hyper parameters to test on
try:
model_hyp_range = json.load(
open('experiments/config/%s_hypRange2.conf' % data_set, 'r')
)
except IOError:
model_hyp_range = json.load(
open('experiments/config/default_hypRange.conf', 'r')
)
# Test each hyperparameter for each method and store the best
for meth in params["methods"]:
dim = 128
MethClass = getattr(
importlib.import_module("embedding.%s" % meth),
methClassMap[meth]
)
meth_hyp_range = model_hyp_range[meth]
gr_max, lp_max = 0, 0
gr_hyp, lp_hyp = 0, 0
gr_hyp, lp_hyp = {meth: {}}, {meth: {}}
# Test each hyperparameter
ev_cols = ["GR MAP", "LP MAP"]
hyp_df = pd.DataFrame(
columns=meth_hyp_range.keys() + ev_cols + ["Round Id"]
)
hyp_r_idx = 0
for hyp in itertools.product(*meth_hyp_range.values()):
hyp_d = {"d": dim}
hyp_d.update(dict(zip(meth_hyp_range.keys(), hyp)))
print(hyp_d)
if meth == "dynAE" or meth == "dynRNN" or meth == "dynAERNN":
hyp_d.update({
"modelfile": [
"./intermediate/encoder_model_%s_%d.json" % (data_set, dim),
"./intermediate/decoder_model_%s_%d.json" % (data_set, dim)
],
"weightfile": [
"./intermediate/encoder_weights_%s_%d.hdf5" % (data_set, dim),
"./intermediate/decoder_weights_%s_%d.hdf5" % (data_set, dim)
]
})
# elif meth == "gf" or meth == "node2vec":
# hyp_d.update({"data_set": data_set})
MethObj = MethClass(hyp_d)
gr, lp = run_exps(MethObj, meth, dim, graphs,
data_set, params)
gr_m, lp_m = np.mean(gr), np.mean(lp)
gr_max, gr_hyp[meth] = get_max(gr_m, gr_max, hyp_d, gr_hyp[meth])
lp_max, lp_hyp[meth] = get_max(lp_m, lp_max, hyp_d, lp_hyp[meth])
hyp_df_row = dict(zip(meth_hyp_range.keys(), hyp))
for r_id in range(params["rounds"]):
hyp_df.loc[hyp_r_idx, meth_hyp_range.keys()] = \
pd.Series(hyp_df_row)
# hyp_df.loc[hyp_r_idx, ev_cols + ["Round Id"]] = \
# [gr[min(r_id, len(gr) -1)], lp[r_id], r_id]
hyp_df.loc[hyp_r_idx, ev_cols + ["Round Id"]] = \
[np.mean(np.array(gr)), np.mean(np.array(lp)), r_id]
hyp_r_idx += 1
exp_param = params["experiments"]
for exp in exp_param:
hyp_df.to_hdf(
"intermediate/%s_%s_%s_%s_hyp.h5" % (data_set, meth,
exp,
params["samp_scheme"]),
"df"
)
plot_util.plot_hyp(meth_hyp_range.keys(), exp_param,
meth, data_set, s_sch=params["samp_scheme"])
# Store the best hyperparameter
opt_hyp_f_pre = 'experiments/config/%s_%s_%s' % (
data_set,
meth,
params["samp_scheme"]
)
if gr_max:
with open('%s_gr.conf' % opt_hyp_f_pre, 'w') as f:
f.write(json.dumps(gr_hyp, indent=4))
if lp_max:
with open('%s_lp.conf' % opt_hyp_f_pre, 'w') as f:
f.write(json.dumps(lp_hyp, indent=4))
[docs]def call_plot_hyp(data_set, params):
"""Function to plot the result of hyperparameter search
Attributes:
data_set (str): Name of the dataset to be used for the experiment
params (dict): Dictionary of parameters necessary for running the experiment
"""
# Load range of hyper parameters tested on to plot
try:
model_hyp_range = json.load(
open('experiments/config/%s_hypRange.conf' % data_set, 'r')
)
except IOError:
model_hyp_range = json.load(
open('experiments/config/default_hypRange.conf', 'r')
)
for meth in params["methods"]:
meth_hyp_range = model_hyp_range[meth]
exp_param = params["experiments"]
plot_util.plot_hyp(meth_hyp_range.keys(), exp_param,
meth, data_set,
s_sch=params["samp_scheme"])
[docs]def call_plot_hyp_all(data_sets, params):
"""Function to plot the the result of all the hyper-parameters
Attributes:
data_set (str): Name of the dataset to be used for the experiment
params (dict): Dictionary of parameters necessary for running the experiment
"""
# Load range of hyper parameters tested on to plot
try:
model_hyp_range = json.load(
open('experiments/config/%s_hypRange.conf' % data_sets[0], 'r')
)
except IOError:
model_hyp_range = json.load(
open('experiments/config/default_hypRange.conf', 'r')
)
for meth in params["methods"]:
meth_hyp_range = model_hyp_range[meth]
exp_param = params["experiments"]
plot_util.plot_hyp_all(meth_hyp_range.keys(), exp_param,
meth, data_sets,
s_sch=params["samp_scheme"])
[docs]def call_exps(params, data_set, n_graphs):
"""Function to run the experiments
Attributes:
n_graphs (int): Total number of graphs in a sequence.
data_set (str): Name of the dataset to be used for the experiment
params (dict): Dictionary of parameters necessary for running the experiment
"""
# Load Dataset
print('Dataset: %s' % data_set)
if data_set == "sbm":
node_num = 500
community_num = 2
node_change_num = 5
length = n_graphs
sbm_gs = dynamic_SBM_graph.get_community_diminish_series_v2(
node_num,
community_num,
length,
1,
node_change_num
)
graphs = [g[0] for g in sbm_gs]
else:
graphs = []
for t in range(n_graphs):
G = nx.read_gpickle(
'data/%s/graph_%d.gpickle' % (data_set, t)
)
G, nodeListMap = graph_util.get_lcc(G)
graphs.append(G)
print('Graph %d:' % t)
graph_util.print_graph_stats(G)
# Search through the hyperparameter space
if params["find_hyp"]:
choose_best_hyp(data_set, graphs, params)
# Load best hyperparameter and test it again on new test data
for d, meth, exp in itertools.product(
params["dimensions"],
params["methods"],
params["experiments"]
):
dim = int(d)
MethClass = getattr(
importlib.import_module("embedding.%s" % meth),
methClassMap[meth]
)
opt_hyp_f_pre = 'experiments/config/%s_%s_%s' % (
data_set,
meth,
params["samp_scheme"]
)
try:
model_hyp = json.load(
open('%s_%s.conf' % (opt_hyp_f_pre, exp), 'r')
)
except IOError:
print('Default hyperparameter of the method chosen')
model_hyp = json.load(
open('experiments/config/%s.conf' % meth, 'r')
)
hyp = {}
hyp.update(model_hyp[meth])
hyp.update({"d": dim})
if meth == "dynAE" or meth == "dynAERNN" or meth == "dynRNN":
hyp.update({
"modelfile": [
"./intermediate/encoder_model_%s_%d.json" % (data_set, dim),
"./intermediate/decoder_model_%s_%d.json" % (data_set, dim)
],
"weightfile": [
"./intermediate/encoder_weights_%s_%d.hdf5" % (data_set, dim),
"./intermediate/decoder_weights_%s_%d.hdf5" % (data_set, dim)
]
})
elif meth == "gf" or meth == "node2vec":
hyp.update({"data_set": data_set})
MethObj = MethClass(hyp)
run_exps(MethObj, meth, dim, graphs, data_set, params)
if __name__ == '__main__':
''' Sample usage
python experiments/exp.py -data sbm -dim 128 -meth sdne -exp gr,lp
'''
t1 = time()
parser = ArgumentParser(description='Graph Embedding Experiments')
parser.add_argument('-data', '--data_sets',
help='dataset names (default: sbm)')
parser.add_argument('-dim', '--dimensions',
help='embedding dimensions list(default: 2^1 to 2^8)')
parser.add_argument('-meth', '--methods',
help='method list (default: all methods)')
parser.add_argument('-exp', '--experiments',
help='exp list (default: gr,lp)')
parser.add_argument('-lemb', '--load_emb',
help='load saved embeddings (default: False)')
parser.add_argument('-lexp', '--load_exp',
help='load saved experiment results (default: False)')
parser.add_argument('-rounds', '--rounds',
help='number of rounds (default: 5)')
parser.add_argument('-plot', '--plot',
help='plot the results (default: True)')
parser.add_argument('-plot_d', '--plot_d',
help='plot the results wrt dims(default: True)')
parser.add_argument('-hyp_plot', '--hyp_plot',
help='plot the hyperparameter results (default: True)')
parser.add_argument('-hyp_plot_all', '--hyp_plot_all',
help='plot the hyperparameter results (all) (default: True)')
parser.add_argument('-find_hyp', '--find_hyp',
help='find best hyperparameters (default: False)')
parser.add_argument('-saveMAP', '--save_MAP',
help='save MAP in a latex table (default: False)')
parser.add_argument('-n_samples', '--n_sample_nodes',
help='number of sampled nodes (default: 1024)')
parser.add_argument('-s_sch', '--samp_scheme',
help='sampling scheme (default: u_rand)')
parser.add_argument('-n_graphs', '--n_graphs',
help='# of graphs (default: 5)')
params = json.load(open('experiments/config/params.conf', 'r'))
args = vars(parser.parse_args())
print(args)
for k, v in args.iteritems():
if v is not None:
params[k] = v
params["experiments"] = params["experiments"].split(',')
params["data_sets"] = params["data_sets"].split(',')
params["rounds"] = int(params["rounds"])
params["n_sample_nodes"] = int(params["n_sample_nodes"])
params["is_undirected"] = bool(int(params["is_undirected"]))
params["plot_d"] = bool(int(params["plot_d"]))
params["plot"] = bool(int(params["plot"]))
params["hyp_plot"] = bool(int(params["hyp_plot"]))
params["hyp_plot_all"] = bool(int(params["hyp_plot_all"]))
t_pred = int(params["n_graphs"]) - int(params["n_graphs"]) // 2
if params["methods"] == "all":
params["methods"] = methClassMap.keys()
else:
params["methods"] = params["methods"].split(',')
params["dimensions"] = params["dimensions"].split(',')
print(params)
for data_set in params["data_sets"]:
if not int(params["load_exp"]):
call_exps(params, data_set, int(params["n_graphs"]))
if int(params["plot"]):
res_pre = "results/%s" % data_set
plot_util.plotExpRes(res_pre, params["methods"],
params["experiments"], params["dimensions"],
'plots/%s_%s' % (data_set, params["samp_scheme"]),
params["rounds"], params["plot_d"], t_pred,
params["samp_scheme"])
if int(params["hyp_plot"]):
call_plot_hyp(data_set, params)
if int(params["hyp_plot_all"]):
call_plot_hyp_all(params["data_sets"], params)