Source code for dynamicgem.evaluation.evaluate_link_prediction

try:
    import cPickle as pickle
except:
    import pickle
from dynamicgem.evaluation import metrics as metrics
from dynamicgem.utils import evaluation_util
from dynamicgem.utils import graph_util
import numpy as np
import networkx as nx
import pdb
import sys

sys.path.insert(0, './')
from dynamicgem.utils import embed_util


[docs]def evaluateDynamicLinkPrediction(graph,
                                  embedding,
                                  rounds,
                                  n_sample_nodes=None,
                                  no_python=False,
                                  is_undirected=True,
                                  sampling_scheme="u_rand"):
    """Function to evaluate Dynamic Link Prediction
           
           Attributes:
               graph (Object): Networkx Graph Object
               embedding (object): Algorithm for learning graph embedding
               n_sample_nodes (list): sampled nodes
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme (str): Sampling scheme to be used. 

            Returns:
                ndarray: MAP, precision curve
    """
    node_l = None
    if n_sample_nodes:
        if sampling_scheme == "u_rand":
            test_digraph, node_l = graph_util.sample_graph(
                graph,
                n_sample_nodes
            )
        else:
            test_digraph, node_l = graph_util.sample_graph_rw_int(
                graph,
                n_sample_nodes
            )
    estimated_adj = embedding.predict_next_adj(node_l)
    print(len(estimated_adj), np.shape(estimated_adj))

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj,
        is_undirected=is_undirected,
        edge_pairs=None
    )
    print(len(predicted_edge_list), np.shape(predicted_edge_list), len(test_digraph.edges()),
          np.shape(test_digraph.edges()))
    # pdb.set_trace()

    MAP = metrics.computeMAP(predicted_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(
        predicted_edge_list,
        test_digraph
    )
    return (MAP, prec_curv)


[docs]def evaluateDynamicLinkPrediction_TIMERS(graph,
                                         embedding, t,
                                         rounds,
                                         n_sample_nodes=None,
                                         no_python=False,
                                         is_undirected=True,
                                         sampling_scheme="u_rand"):
    """Function to evaluate Dynamic Link Prediction for TIMERS
           
           Attributes:
               graph (Object): Networkx Graph Object
               embedding (object): Algorithm for learning graph embedding
               t(int): sequence of the graph
               n_sample_nodes (list): sampled nodes
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme (str): Sampling scheme to be used. 

            Returns:
                ndarray: MAP, precision curve
    """
    node_l = None
    if n_sample_nodes:
        if sampling_scheme == "u_rand":
            test_digraph, node_l = graph_util.sample_graph(
                graph,
                n_sample_nodes
            )
        else:
            test_digraph, node_l = graph_util.sample_graph_rw_int(
                graph,
                n_sample_nodes
            )
    estimated_adj = embedding.predict_next_adj(t, node_l)

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj,
        is_undirected=is_undirected,
        edge_pairs=None
    )

    MAP = metrics.computeMAP(predicted_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(
        predicted_edge_list,
        test_digraph
    )
    return (MAP, prec_curv)


[docs]def expLP(graphs,
          embedding,
          rounds,
          res_pre,
          m_summ,
          n_sample_nodes=1000,
          train_ratio_init=0.5,
          no_python=False,
          is_undirected=True,
          sampling_scheme="u_rand"):
    """Function to evaluate link prediction
           
           Attributes:
               digraph (Object): Networkx Graph Object
               graph_embedding (object): Algorithm for learning graph embedding
               X_stat (ndarray): Embedding values of the graph.
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               res_pre (str): prefix to be used to store the result. 
               m_summ (str): summary to be used to save the result.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    n_sample_nodes = int(n_sample_nodes)
    print('\tDynamic Link Prediction')
    summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    summ_file.close()
    T = len(graphs)
    T_min = int(train_ratio_init * T)
    MAP = [None] * (T - T_min)
    prec_curv = [None] * (T - T_min)
    for i in range(T - T_min):
        MAP[i] = [None] * rounds
        prec_curv[i] = [None] * rounds
    for t in range(T_min, T):
        embedding.learn_embeddings(graphs[:t])
        for r_id in range(rounds):
            MAP[t - T_min][r_id], prec_curv[t - T_min][r_id] = \
                evaluateDynamicLinkPrediction(graphs[t], embedding,
                                              rounds,
                                              n_sample_nodes=n_sample_nodes,
                                              no_python=no_python,
                                              is_undirected=is_undirected,
                                              sampling_scheme=sampling_scheme)
        summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a')
        summ_file.write('\tt=%d%f/%f\t%s\n' % (
            t - T_min,
            np.mean(MAP[t - T_min]),
            np.std(MAP[t - T_min]),
            metrics.getPrecisionReport(
                prec_curv[t - T_min][0],
                len(prec_curv[t - T_min][0])
            )
        ))
        summ_file.close()
    # pickle.dump([MAP, prec_curv],
    #             open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme),
    #                  'wb'))
    return np.mean(np.array(MAP))


[docs]def exp_changedLP(graphs,
                  embedding,
                  rounds,
                  res_pre,
                  m_summ,
                  n_sample_nodes=1000,
                  train_ratio_init=0.5,
                  no_python=False,
                  is_undirected=True,
                  sampling_scheme="u_rand"):
    """Function to evaluate only changed link prediction
           
           Attributes:
               digraph (Object): Networkx Graph Object
               graph_embedding (object): Algorithm for learning graph embedding
               X_stat (ndarray): Embedding values of the graph.
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               res_pre (str): prefix to be used to store the result. 
               m_summ (str): summary to be used to save the result.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    n_sample_nodes = int(n_sample_nodes)
    print('\tDynamic Link Prediction')
    summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    summ_file.close()
    T = len(graphs)
    T_min = int(train_ratio_init * T)
    MAP = [None] * (T - T_min)
    prec_curv = [None] * (T - T_min)
    for i in range(T - T_min):
        MAP[i] = [None] * rounds
        prec_curv[i] = [None] * rounds
    for t in range(T_min, T):
        edges_add, edges_rm = getchangedlinks(graphs[t - 1], graphs[t])
        embedding.learn_embeddings(graphs[:t])
        for r_id in range(rounds):
            MAP[t - T_min][r_id], prec_curv[t - T_min][r_id] = \
                evaluateDynamic_changed_LinkPrediction(graphs[t], embedding,
                                                       rounds,
                                                       edges_add, edges_rm,
                                                       # dynamic_sbm_series[t][3],
                                                       n_sample_nodes=n_sample_nodes,
                                                       no_python=no_python,
                                                       is_undirected=is_undirected,
                                                       sampling_scheme=sampling_scheme)
        summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a')
        summ_file.write('\tt=%d%f/%f\t%s\n' % (
            t - T_min,
            np.mean(MAP[t - T_min]),
            np.std(MAP[t - T_min]),
            metrics.getPrecisionReport(
                prec_curv[t - T_min][0],
                len(prec_curv[t - T_min][0])
            )
        ))
        summ_file.close()
    # pickle.dump([MAP, prec_curv],
    #             open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme),
    #                  'wb'))
    return np.mean(np.array(MAP))


[docs]def evaluateDynamic_changed_LinkPrediction(graph,
                                           embedding,
                                           rounds,
                                           edges_add, edges_rm,
                                           n_sample_nodes=None,
                                           no_python=False,
                                           is_undirected=True,
                                           sampling_scheme="u_rand"):
    """Function to evaluate dynamic changed link prediction
           
           Attributes:
               graph (Object): Networkx Graph Object
               embedding (object): Algorithm for learning graph embedding.
               edges_add (list): list of edges to be added.
               edges_rm (list): list of edges to be removed.
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               m_summ (str): summary to be used to save the result.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    nodes = []
    for e in edges_add[0]:
        nodes.append(e[0])
        nodes.append(e[1])

    # for e in edges_rm[0]:
    #   nodes.append(e[0])
    #   nodes.append(e[1])  

    nodes = list(np.unique(nodes))
    # pdb.set_trace()

    test_digraph, node_l = graph_util.sample_graph(graph, len(nodes), nodes)
    estimated_adj = embedding.predict_next_adj(node_l)

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj,
        is_undirected=is_undirected,
        edge_pairs=None
    )

    MAP = metrics.computeMAP(predicted_edge_list, test_digraph)
    prec_curv, _ = metrics.computePrecisionCurve(
        predicted_edge_list,
        test_digraph
    )

    return (MAP, prec_curv)


[docs]def evaluateDynamic_changed_LinkPrediction_v2(graph,
                                              embedding,
                                              rounds,
                                              edges_add, edges_rm,
                                              n_sample_nodes=None,
                                              no_python=False,
                                              is_undirected=True,
                                              sampling_scheme="u_rand"):
    """Function to evaluate dynamic changed link prediction
           
           Attributes:
               graph (Object): Networkx Graph Object
               embedding (object): Algorithm for learning graph embedding.
               edges_add (list): list of edges to be added.
               edges_rm (list): list of edges to be removed.
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               m_summ (str): summary to be used to save the result.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    nodes = []
    for e in edges_add[0]:
        nodes.append(e[0])
        nodes.append(e[1])

    # for e in edges_rm[0]:
    #   nodes.append(e[0])
    #   nodes.append(e[1])  

    nodes = list(np.unique(nodes))
    # pdb.set_trace()

    test_digraph, node_dict = graph_util.sample_graph_nodes(graph, nodes)
    estimated_adj = embedding.predict_next_adj(node_l)

    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj,
        is_undirected=is_undirected,
        edge_pairs=None
    )

    MAP = metrics.computeMAP(predicted_edge_list, test_digraph, node_dict, edges_rm)

    node_edges_rm = []
    for i in range(len(edges_rm[0])):
        node_edges_rm.append([])
    for st, ed in edges_rm[0]:
        node_edges_rm[node_dict[st]].append((node_dict[st], node_dict[ed], 1))
    node_edges_rm = [node_edges_rm[i] for i in xrange(len(node_edges_rm)) if len(node_edges_rm[i]) > 0]

    # pdb.set_trace()
    prec_curv, _ = metrics.computePrecisionCurve(
        predicted_edge_list,
        test_digraph, node_edges_rm
    )
    # pdb.set_trace()

    return (MAP, prec_curv)


[docs]def getchangedlinks(G, Gnew):
    """Functionto get all the changed links"""
    # get all the changed links
    edges_add = []
    Gdiff = nx.difference(Gnew, G)
    edges_add.append(Gdiff.edges())

    edges_rm = []
    Gdiff = nx.difference(G, Gnew)
    edges_rm.append(Gdiff.edges())

    # pdb.set_trace()

    # for e in edges:
    #   nodes.append(e[0])
    #   nodes.append(e[1])

    return edges_add, edges_rm


[docs]def expstatic_changedLP(dynamic_sbm_series,
                        graphs,
                        embedding,
                        rounds,
                        res_pre,
                        m_summ,
                        n_sample_nodes=1000,
                        train_ratio_init=0.5,
                        no_python=False,
                        is_undirected=True,
                        sampling_scheme="u_rand"):
    """Function to evaluate statically changed link prediction
           
           Attributes:
               dynamic_sbm_series (list): list of Networkx Graph Object
               gaphs (object): Networkx graphs
               embedding (object): Algorithm for learning graph embedding
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               res_pre (str): prefix to be used to store the result. 
               m_summ (str): summary to be used to save the result.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    n_sample_nodes = int(n_sample_nodes)
    print('\tDynamic Link Prediction')
    summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    summ_file.close()
    T = len(graphs)
    # T_min = int(train_ratio_init * T)
    MAP = [None] * (T - 1)
    prec_curv = [None] * (T - 1)
    for i in range(T - 1):
        MAP[i] = [None] * rounds
        prec_curv[i] = [None] * rounds
    for t in range(T - 1):
        embedding.learn_embeddings(graphs[t])
        edges_add, edges_rm = getchangedlinks(graphs[t], graphs[t + 1])
        for r_id in range(rounds):
            MAP[t][r_id], prec_curv[t][r_id] = \
                evaluateDynamic_changed_LinkPrediction(graphs[t + 1], embedding,
                                                       rounds,
                                                       edges_add, edges_rm,
                                                       # dynamic_sbm_series[t][3],
                                                       n_sample_nodes=n_sample_nodes,
                                                       no_python=no_python,
                                                       is_undirected=is_undirected,
                                                       sampling_scheme=sampling_scheme)
        summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a')
        summ_file.write('\tt=%d%f/%f\t%s\n' % (
            t,
            np.mean(MAP[t]),
            np.std(MAP[t]),
            metrics.getPrecisionReport(
                prec_curv[t][0],
                len(prec_curv[t][0])
            )
        ))
        summ_file.close()
    # pickle.dump([MAP, prec_curv],
    #             open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme),
    #                  'wb'))
    return np.mean(np.array(MAP))


[docs]def expstaticLP(dynamic_sbm_series,
                graphs,
                embedding,
                rounds,
                res_pre,
                m_summ,
                n_sample_nodes=1000,
                train_ratio_init=0.5,
                no_python=False,
                is_undirected=True,
                sampling_scheme="u_rand"):
    """Function to evaluate statically changed link prediction
           
           Attributes:
               dynamic_sbm_series (list): list of Networkx Graph Object
               gaphs (object): Networkx graphs
               embedding (object): Algorithm for learning graph embedding
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               res_pre (str): prefix to be used to store the result. 
               m_summ (str): summary to be used to save the result.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    n_sample_nodes = int(n_sample_nodes)
    print('\tDynamic Link Prediction')
    summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    summ_file.close()
    T = len(graphs)
    # T_min = int(train_ratio_init * T)
    MAP = [None] * (T - 1)
    prec_curv = [None] * (T - 1)
    for i in range(T - 1):
        MAP[i] = [None] * rounds
        prec_curv[i] = [None] * rounds
    for t in range(T - 1):
        embedding.learn_embeddings(graphs[t])
        for r_id in range(rounds):
            MAP[t][r_id], prec_curv[t][r_id] = \
                evaluateDynamicLinkPrediction(graphs[t + 1], embedding,
                                              rounds,
                                              # dynamic_sbm_series[t][3],
                                              n_sample_nodes=n_sample_nodes,
                                              no_python=no_python,
                                              is_undirected=is_undirected,
                                              sampling_scheme=sampling_scheme)
        summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a')
        summ_file.write('\tt=%d%f/%f\t%s\n' % (
            t,
            np.mean(MAP[t]),
            np.std(MAP[t]),
            metrics.getPrecisionReport(
                prec_curv[t][0],
                len(prec_curv[t][0])
            )
        ))
        summ_file.close()
    # pickle.dump([MAP, prec_curv],
    #             open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme),
    #                  'wb'))
    return np.mean(np.array(MAP))


[docs]def expstaticLP_TIMERS(dynamic_sbm_series,
                       graphs,
                       embedding,
                       rounds,
                       res_pre,
                       m_summ,
                       n_sample_nodes=1000,
                       train_ratio_init=0.5,
                       no_python=False,
                       is_undirected=True,
                       sampling_scheme="u_rand"):
    """Function to evaluate statically changed link prediction for TIMERS
           
           Attributes:
               dynamic_sbm_series (list): list of Networkx Graph Object
               gaphs (object): Networkx graphs
               embedding (object): Algorithm for learning graph embedding
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               res_pre (str): prefix to be used to store the result. 
               m_summ (str): summary to be used to save the result.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    n_sample_nodes = int(n_sample_nodes)
    print('\tDynamic Link Prediction')
    summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    summ_file.close()
    T = len(graphs)
    # T_min = int(train_ratio_init * T)
    MAP = [None] * (T - 1)
    prec_curv = [None] * (T - 1)
    for i in range(T - 1):
        MAP[i] = [None] * rounds
        prec_curv[i] = [None] * rounds
    for t in range(T - 1):
        # embedding.learn_embeddings(t)
        for r_id in range(rounds):
            MAP[t][r_id], prec_curv[t][r_id] = \
                evaluateDynamicLinkPrediction_TIMERS(graphs[t + 1], embedding, t,
                                                     rounds,
                                                     # dynamic_sbm_series[t][3],
                                                     n_sample_nodes=n_sample_nodes,
                                                     no_python=no_python,
                                                     is_undirected=is_undirected,
                                                     sampling_scheme=sampling_scheme)
        summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a')
        summ_file.write('\tt=%d%f/%f\t%s\n' % (
            t,
            np.mean(MAP[t]),
            np.std(MAP[t]),
            metrics.getPrecisionReport(
                prec_curv[t][0],
                len(prec_curv[t][0])
            )
        ))
        summ_file.close()
    # pickle.dump([MAP, prec_curv],
    #             open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme),
    #                  'wb'))
    return np.mean(np.array(MAP))


[docs]def expstaticLP_TRIAD(dynamic_sbm_series,
                      graphs,
                      embedding,
                      rounds,
                      res_pre,
                      m_summ,
                      n_sample_nodes=1000,
                      train_ratio_init=0.5,
                      no_python=False,
                      is_undirected=True,
                      sampling_scheme="u_rand"):
    
    """Function to evaluate statically changed link prediction for dynamic Triad
           
           Attributes:
               dynamic_sbm_series (list): list of Networkx Graph Object
               gaphs (object): Networkx graphs
               embedding (object): Algorithm for learning graph embedding
               n_sampled_nodes (int): List of sampled nodes.
               train_ratio_init (float): sample to be used for training and testing.
               rounds (int): Number of times to run the experiment
               res_pre (str): prefix to be used to store the result. 
               m_summ (str): summary to be used to save the result.
               file_suffix (str): Suffix for file name.
               is_undirected (bool): Flag to denote if the graph is directed.
               sampling_scheme(str): sampling scheme for selecting the nodes.

            Returns:
                ndarray: Mean Average precision
    """
    n_sample_nodes = int(n_sample_nodes)
    print('\tDynamic Link Prediction')
    summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    summ_file.close()
    T = len(graphs)
    # T_min = int(train_ratio_init * T)
    MAP = [None] * (T - 1)
    prec_curv = [None] * (T - 1)
    for i in range(T - 1):
        MAP[i] = [None] * rounds
        prec_curv[i] = [None] * rounds
    for t in range(T - 1):

        embedding.link_predict(graphs[t], t)
        for r_id in range(rounds):
            MAP[t][r_id], prec_curv[t][r_id] = \
                evaluateDynamicLinkPrediction_TIMERS(graphs[t + 1], embedding, t,
                                                     rounds,
                                                     # dynamic_sbm_series[t][3],
                                                     n_sample_nodes=n_sample_nodes,
                                                     no_python=no_python,
                                                     is_undirected=is_undirected,
                                                     sampling_scheme=sampling_scheme)
        summ_file = open('%s%s.dlpsumm' % (res_pre, m_summ), 'a')
        summ_file.write('\tt=%d%f/%f\t%s\n' % (
            t,
            np.mean(MAP[t]),
            np.std(MAP[t]),
            metrics.getPrecisionReport(
                prec_curv[t][0],
                len(prec_curv[t][0])
            )
        ))
        summ_file.close()
    # pickle.dump([MAP, prec_curv],
    #             open('%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme),
    #                  'wb'))
    return np.mean(np.array(MAP))