Source code for dynamicgem.evaluation.evaluate_link_prediction

try:
    import cPickle as pickle
except:
    import pickle
from dynamicgem.evaluation import metrics as metrics
from dynamicgem.utils import evaluation_util
from dynamicgem.utils import graph_util
import numpy as np
import networkx as nx
import pdb
import sys

sys.path.insert(0, './')
from dynamicgem.utils import embed_util


[docs]def evaluateDynamicLinkPrediction(graph, embedding, rounds, n_sample_nodes=None, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate Dynamic Link Prediction Attributes: graph (Object): Networkx Graph Object embedding (object): Algorithm for learning graph embedding n_sample_nodes (list): sampled nodes is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme (str): Sampling scheme to be used. Returns: ndarray: MAP, precision curve """ node_l = None if n_sample_nodes: if sampling_scheme == "u_rand": test_digraph, node_l = graph_util.sample_graph( graph, n_sample_nodes ) else: test_digraph, node_l = graph_util.sample_graph_rw_int( graph, n_sample_nodes ) estimated_adj = embedding.predict_next_adj(node_l) print(len(estimated_adj), np.shape(estimated_adj)) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=None ) print(len(predicted_edge_list), np.shape(predicted_edge_list), len(test_digraph.edges()), np.shape(test_digraph.edges())) # pdb.set_trace() MAP = metrics.computeMAP(predicted_edge_list, test_digraph) prec_curv, _ = metrics.computePrecisionCurve( predicted_edge_list, test_digraph ) return (MAP, prec_curv)
[docs]def evaluateDynamicLinkPrediction_TIMERS(graph, embedding, t, rounds, n_sample_nodes=None, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate Dynamic Link Prediction for TIMERS Attributes: graph (Object): Networkx Graph Object embedding (object): Algorithm for learning graph embedding t(int): sequence of the graph n_sample_nodes (list): sampled nodes is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme (str): Sampling scheme to be used. Returns: ndarray: MAP, precision curve """ node_l = None if n_sample_nodes: if sampling_scheme == "u_rand": test_digraph, node_l = graph_util.sample_graph( graph, n_sample_nodes ) else: test_digraph, node_l = graph_util.sample_graph_rw_int( graph, n_sample_nodes ) estimated_adj = embedding.predict_next_adj(t, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=None ) MAP = metrics.computeMAP(predicted_edge_list, test_digraph) prec_curv, _ = metrics.computePrecisionCurve( predicted_edge_list, test_digraph ) return (MAP, prec_curv)
[docs]def expLP(graphs, embedding, rounds, res_pre, m_summ, n_sample_nodes=1000, train_ratio_init=0.5, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate link prediction Attributes: digraph (Object): Networkx Graph Object graph_embedding (object): Algorithm for learning graph embedding X_stat (ndarray): Embedding values of the graph. n_sampled_nodes (int): List of sampled nodes. train_ratio_init (float): sample to be used for training and testing. rounds (int): Number of times to run the experiment res_pre (str): prefix to be used to store the result. m_summ (str): summary to be used to save the result. file_suffix (str): Suffix for file name. is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme(str): sampling scheme for selecting the nodes. Returns: ndarray: Mean Average precision """ n_sample_nodes = int(n_sample_nodes) print('\tDynamic Link Prediction') summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) summ_file.close() T = len(graphs) T_min = int(train_ratio_init * T) MAP = [None] * (T - T_min) prec_curv = [None] * (T - T_min) for i in range(T - T_min): MAP[i] = [None] * rounds prec_curv[i] = [None] * rounds for t in range(T_min, T): embedding.learn_embeddings(graphs[:t]) for r_id in range(rounds): MAP[t - T_min][r_id], prec_curv[t - T_min][r_id] = \ evaluateDynamicLinkPrediction(graphs[t], embedding, rounds, n_sample_nodes=n_sample_nodes, no_python=no_python, is_undirected=is_undirected, sampling_scheme=sampling_scheme) summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a') summ_file.write('\tt=%d%f/%f\t%s\n' % ( t - T_min, np.mean(MAP[t - T_min]), np.std(MAP[t - T_min]), metrics.getPrecisionReport( prec_curv[t - T_min][0], len(prec_curv[t - T_min][0]) ) )) summ_file.close() # pickle.dump([MAP, prec_curv], # open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme), # 'wb')) return np.mean(np.array(MAP))
[docs]def exp_changedLP(graphs, embedding, rounds, res_pre, m_summ, n_sample_nodes=1000, train_ratio_init=0.5, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate only changed link prediction Attributes: digraph (Object): Networkx Graph Object graph_embedding (object): Algorithm for learning graph embedding X_stat (ndarray): Embedding values of the graph. n_sampled_nodes (int): List of sampled nodes. train_ratio_init (float): sample to be used for training and testing. rounds (int): Number of times to run the experiment res_pre (str): prefix to be used to store the result. m_summ (str): summary to be used to save the result. file_suffix (str): Suffix for file name. is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme(str): sampling scheme for selecting the nodes. Returns: ndarray: Mean Average precision """ n_sample_nodes = int(n_sample_nodes) print('\tDynamic Link Prediction') summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) summ_file.close() T = len(graphs) T_min = int(train_ratio_init * T) MAP = [None] * (T - T_min) prec_curv = [None] * (T - T_min) for i in range(T - T_min): MAP[i] = [None] * rounds prec_curv[i] = [None] * rounds for t in range(T_min, T): edges_add, edges_rm = getchangedlinks(graphs[t - 1], graphs[t]) embedding.learn_embeddings(graphs[:t]) for r_id in range(rounds): MAP[t - T_min][r_id], prec_curv[t - T_min][r_id] = \ evaluateDynamic_changed_LinkPrediction(graphs[t], embedding, rounds, edges_add, edges_rm, # dynamic_sbm_series[t][3], n_sample_nodes=n_sample_nodes, no_python=no_python, is_undirected=is_undirected, sampling_scheme=sampling_scheme) summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a') summ_file.write('\tt=%d%f/%f\t%s\n' % ( t - T_min, np.mean(MAP[t - T_min]), np.std(MAP[t - T_min]), metrics.getPrecisionReport( prec_curv[t - T_min][0], len(prec_curv[t - T_min][0]) ) )) summ_file.close() # pickle.dump([MAP, prec_curv], # open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme), # 'wb')) return np.mean(np.array(MAP))
[docs]def evaluateDynamic_changed_LinkPrediction(graph, embedding, rounds, edges_add, edges_rm, n_sample_nodes=None, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate dynamic changed link prediction Attributes: graph (Object): Networkx Graph Object embedding (object): Algorithm for learning graph embedding. edges_add (list): list of edges to be added. edges_rm (list): list of edges to be removed. n_sampled_nodes (int): List of sampled nodes. train_ratio_init (float): sample to be used for training and testing. rounds (int): Number of times to run the experiment m_summ (str): summary to be used to save the result. is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme(str): sampling scheme for selecting the nodes. Returns: ndarray: Mean Average precision """ nodes = [] for e in edges_add[0]: nodes.append(e[0]) nodes.append(e[1]) # for e in edges_rm[0]: # nodes.append(e[0]) # nodes.append(e[1]) nodes = list(np.unique(nodes)) # pdb.set_trace() test_digraph, node_l = graph_util.sample_graph(graph, len(nodes), nodes) estimated_adj = embedding.predict_next_adj(node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=None ) MAP = metrics.computeMAP(predicted_edge_list, test_digraph) prec_curv, _ = metrics.computePrecisionCurve( predicted_edge_list, test_digraph ) return (MAP, prec_curv)
[docs]def evaluateDynamic_changed_LinkPrediction_v2(graph, embedding, rounds, edges_add, edges_rm, n_sample_nodes=None, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate dynamic changed link prediction Attributes: graph (Object): Networkx Graph Object embedding (object): Algorithm for learning graph embedding. edges_add (list): list of edges to be added. edges_rm (list): list of edges to be removed. n_sampled_nodes (int): List of sampled nodes. train_ratio_init (float): sample to be used for training and testing. rounds (int): Number of times to run the experiment m_summ (str): summary to be used to save the result. is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme(str): sampling scheme for selecting the nodes. Returns: ndarray: Mean Average precision """ nodes = [] for e in edges_add[0]: nodes.append(e[0]) nodes.append(e[1]) # for e in edges_rm[0]: # nodes.append(e[0]) # nodes.append(e[1]) nodes = list(np.unique(nodes)) # pdb.set_trace() test_digraph, node_dict = graph_util.sample_graph_nodes(graph, nodes) estimated_adj = embedding.predict_next_adj(node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=None ) MAP = metrics.computeMAP(predicted_edge_list, test_digraph, node_dict, edges_rm) node_edges_rm = [] for i in range(len(edges_rm[0])): node_edges_rm.append([]) for st, ed in edges_rm[0]: node_edges_rm[node_dict[st]].append((node_dict[st], node_dict[ed], 1)) node_edges_rm = [node_edges_rm[i] for i in xrange(len(node_edges_rm)) if len(node_edges_rm[i]) > 0] # pdb.set_trace() prec_curv, _ = metrics.computePrecisionCurve( predicted_edge_list, test_digraph, node_edges_rm ) # pdb.set_trace() return (MAP, prec_curv)
[docs]def expstatic_changedLP(dynamic_sbm_series, graphs, embedding, rounds, res_pre, m_summ, n_sample_nodes=1000, train_ratio_init=0.5, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate statically changed link prediction Attributes: dynamic_sbm_series (list): list of Networkx Graph Object gaphs (object): Networkx graphs embedding (object): Algorithm for learning graph embedding n_sampled_nodes (int): List of sampled nodes. train_ratio_init (float): sample to be used for training and testing. rounds (int): Number of times to run the experiment res_pre (str): prefix to be used to store the result. m_summ (str): summary to be used to save the result. file_suffix (str): Suffix for file name. is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme(str): sampling scheme for selecting the nodes. Returns: ndarray: Mean Average precision """ n_sample_nodes = int(n_sample_nodes) print('\tDynamic Link Prediction') summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) summ_file.close() T = len(graphs) # T_min = int(train_ratio_init * T) MAP = [None] * (T - 1) prec_curv = [None] * (T - 1) for i in range(T - 1): MAP[i] = [None] * rounds prec_curv[i] = [None] * rounds for t in range(T - 1): embedding.learn_embeddings(graphs[t]) edges_add, edges_rm = getchangedlinks(graphs[t], graphs[t + 1]) for r_id in range(rounds): MAP[t][r_id], prec_curv[t][r_id] = \ evaluateDynamic_changed_LinkPrediction(graphs[t + 1], embedding, rounds, edges_add, edges_rm, # dynamic_sbm_series[t][3], n_sample_nodes=n_sample_nodes, no_python=no_python, is_undirected=is_undirected, sampling_scheme=sampling_scheme) summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a') summ_file.write('\tt=%d%f/%f\t%s\n' % ( t, np.mean(MAP[t]), np.std(MAP[t]), metrics.getPrecisionReport( prec_curv[t][0], len(prec_curv[t][0]) ) )) summ_file.close() # pickle.dump([MAP, prec_curv], # open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme), # 'wb')) return np.mean(np.array(MAP))
[docs]def expstaticLP(dynamic_sbm_series, graphs, embedding, rounds, res_pre, m_summ, n_sample_nodes=1000, train_ratio_init=0.5, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate statically changed link prediction Attributes: dynamic_sbm_series (list): list of Networkx Graph Object gaphs (object): Networkx graphs embedding (object): Algorithm for learning graph embedding n_sampled_nodes (int): List of sampled nodes. train_ratio_init (float): sample to be used for training and testing. rounds (int): Number of times to run the experiment res_pre (str): prefix to be used to store the result. m_summ (str): summary to be used to save the result. file_suffix (str): Suffix for file name. is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme(str): sampling scheme for selecting the nodes. Returns: ndarray: Mean Average precision """ n_sample_nodes = int(n_sample_nodes) print('\tDynamic Link Prediction') summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) summ_file.close() T = len(graphs) # T_min = int(train_ratio_init * T) MAP = [None] * (T - 1) prec_curv = [None] * (T - 1) for i in range(T - 1): MAP[i] = [None] * rounds prec_curv[i] = [None] * rounds for t in range(T - 1): embedding.learn_embeddings(graphs[t]) for r_id in range(rounds): MAP[t][r_id], prec_curv[t][r_id] = \ evaluateDynamicLinkPrediction(graphs[t + 1], embedding, rounds, # dynamic_sbm_series[t][3], n_sample_nodes=n_sample_nodes, no_python=no_python, is_undirected=is_undirected, sampling_scheme=sampling_scheme) summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a') summ_file.write('\tt=%d%f/%f\t%s\n' % ( t, np.mean(MAP[t]), np.std(MAP[t]), metrics.getPrecisionReport( prec_curv[t][0], len(prec_curv[t][0]) ) )) summ_file.close() # pickle.dump([MAP, prec_curv], # open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme), # 'wb')) return np.mean(np.array(MAP))
[docs]def expstaticLP_TIMERS(dynamic_sbm_series, graphs, embedding, rounds, res_pre, m_summ, n_sample_nodes=1000, train_ratio_init=0.5, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate statically changed link prediction for TIMERS Attributes: dynamic_sbm_series (list): list of Networkx Graph Object gaphs (object): Networkx graphs embedding (object): Algorithm for learning graph embedding n_sampled_nodes (int): List of sampled nodes. train_ratio_init (float): sample to be used for training and testing. rounds (int): Number of times to run the experiment res_pre (str): prefix to be used to store the result. m_summ (str): summary to be used to save the result. file_suffix (str): Suffix for file name. is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme(str): sampling scheme for selecting the nodes. Returns: ndarray: Mean Average precision """ n_sample_nodes = int(n_sample_nodes) print('\tDynamic Link Prediction') summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) summ_file.close() T = len(graphs) # T_min = int(train_ratio_init * T) MAP = [None] * (T - 1) prec_curv = [None] * (T - 1) for i in range(T - 1): MAP[i] = [None] * rounds prec_curv[i] = [None] * rounds for t in range(T - 1): # embedding.learn_embeddings(t) for r_id in range(rounds): MAP[t][r_id], prec_curv[t][r_id] = \ evaluateDynamicLinkPrediction_TIMERS(graphs[t + 1], embedding, t, rounds, # dynamic_sbm_series[t][3], n_sample_nodes=n_sample_nodes, no_python=no_python, is_undirected=is_undirected, sampling_scheme=sampling_scheme) summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a') summ_file.write('\tt=%d%f/%f\t%s\n' % ( t, np.mean(MAP[t]), np.std(MAP[t]), metrics.getPrecisionReport( prec_curv[t][0], len(prec_curv[t][0]) ) )) summ_file.close() # pickle.dump([MAP, prec_curv], # open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme), # 'wb')) return np.mean(np.array(MAP))
[docs]def expstaticLP_TRIAD(dynamic_sbm_series, graphs, embedding, rounds, res_pre, m_summ, n_sample_nodes=1000, train_ratio_init=0.5, no_python=False, is_undirected=True, sampling_scheme="u_rand"): """Function to evaluate statically changed link prediction for dynamic Triad Attributes: dynamic_sbm_series (list): list of Networkx Graph Object gaphs (object): Networkx graphs embedding (object): Algorithm for learning graph embedding n_sampled_nodes (int): List of sampled nodes. train_ratio_init (float): sample to be used for training and testing. rounds (int): Number of times to run the experiment res_pre (str): prefix to be used to store the result. m_summ (str): summary to be used to save the result. file_suffix (str): Suffix for file name. is_undirected (bool): Flag to denote if the graph is directed. sampling_scheme(str): sampling scheme for selecting the nodes. Returns: ndarray: Mean Average precision """ n_sample_nodes = int(n_sample_nodes) print('\tDynamic Link Prediction') summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) summ_file.close() T = len(graphs) # T_min = int(train_ratio_init * T) MAP = [None] * (T - 1) prec_curv = [None] * (T - 1) for i in range(T - 1): MAP[i] = [None] * rounds prec_curv[i] = [None] * rounds for t in range(T - 1): embedding.link_predict(graphs[t], t) for r_id in range(rounds): MAP[t][r_id], prec_curv[t][r_id] = \ evaluateDynamicLinkPrediction_TIMERS(graphs[t + 1], embedding, t, rounds, # dynamic_sbm_series[t][3], n_sample_nodes=n_sample_nodes, no_python=no_python, is_undirected=is_undirected, sampling_scheme=sampling_scheme) summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a') summ_file.write('\tt=%d%f/%f\t%s\n' % ( t, np.mean(MAP[t]), np.std(MAP[t]), metrics.getPrecisionReport( prec_curv[t][0], len(prec_curv[t][0]) ) )) summ_file.close() # pickle.dump([MAP, prec_curv], # open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme), # 'wb')) return np.mean(np.array(MAP))