Example Code for TIMERSΒΆ

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
disp_avlbl = True
if os.name == 'posix' and 'DISPLAY' not in os.environ:
    disp_avlbl = False
    import matplotlib
    matplotlib.use('Agg')

import matplotlib.pyplot as plt
import networkx as nx
import operator
from time import time
from argparse import ArgumentParser

from dynamicgem.embedding.TIMERS import TIMERS
from dynamicgem.utils import graph_util, plot_util, dataprep_util
from dynamicgem.evaluation import visualize_embedding as viz
from dynamicgem.utils.sdne_utils import *
from dynamicgem.evaluation import evaluate_graph_reconstruction as gr
from dynamicgem.evaluation import evaluate_link_prediction as lp
from dynamicgem.graph_generation import dynamic_SBM_graph


if __name__ == '__main__':

    parser = ArgumentParser(description='Learns static node embeddings')
    parser.add_argument('-t', '--testDataType',
                        default='sbm_cd',
                        type=str,
                        help='Type of data to test the code')
    parser.add_argument('-l', '--timelength',
                        default=5,
                        type=int,
                        help='Number of time series graph to generate')
    parser.add_argument('-nm', '--nodemigration',
                        default=5,
                        type=int,
                        help='number of nodes to migrate')
    parser.add_argument('-emb', '--embeddimension',
                        default=16,
                        type=float,
                        help='embedding dimension')
    parser.add_argument('-theta', '--theta',
                        default=0.5,  # 0.17
                        type=float,
                        help='a threshold for re-run SVD')
    parser.add_argument('-rdir', '--resultdir',
                        default='./results_link_all',  # 0.17
                        type=str,
                        help='directory for storing results')
    parser.add_argument('-sm', '--samples',
                        default=10,
                        type=int,
                        help='samples for test data')
    parser.add_argument('-exp', '--exp',
                        default='lp',
                        type=str,
                        help='experiments (lp, emb)')

    args = parser.parse_args()
    dim_emb = args.embeddimension
    length = args.timelength
    theta = args.theta
    sample = args.samples

    if args.testDataType == 'sbm_cd':
        node_num = 100
        community_num = 2
        node_change_num = args.nodemigration
        dynamic_sbm_series = dynamic_SBM_graph.get_community_diminish_series_v2(node_num,
                                                                                community_num,
                                                                                length,
                                                                                1,
                                                                                node_change_num)
        graphs = [g[0] for g in dynamic_sbm_series]

        datafile = dataprep_util.prep_input_TIMERS(graphs, length, args.testDataType)

        embedding = TIMERS(K=dim_emb,
                           Theta=theta,
                           datafile=datafile,
                           length=length,
                           nodemigration=args.nodemigration,
                           resultdir=args.resultdir,
                           datatype=args.testDataType
                           )
        outdir_tmp = './output'
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        outdir_tmp = outdir_tmp + '/sbm_cd'
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        if not os.path.exists(outdir_tmp + '/incrementalSVD'):
            os.mkdir(outdir_tmp + '/incrementalSVD')
        if not os.path.exists(outdir_tmp + '/rerunSVD'):
            os.mkdir(outdir_tmp + '/rerunSVD')
        if not os.path.exists(outdir_tmp + '/optimalSVD'):
            os.mkdir(outdir_tmp + '/optimalSVD')

        if args.exp == 'emb':
            print('plotting embedding not implemented!')

        if args.exp == 'lp':
            embedding.learn_embedding()

            outdir = args.resultdir
            if not os.path.exists(outdir):
                os.mkdir(outdir)
            outdir = outdir + '/' + args.testDataType
            if not os.path.exists(outdir):
                os.mkdir(outdir)

            embedding.get_embedding(outdir_tmp, 'incrementalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/incrementalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(dynamic_sbm_series,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'nm' + str(args.nodemigration) + '_l' + str(length) + '_emb' + str(int(dim_emb)),
                                  )

            embedding.get_embedding(outdir_tmp, 'rerunSVD')
            outdir1 = outdir + '/rerunSVD'
            # embedding.plotresults()
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(dynamic_sbm_series,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'nm' + str(args.nodemigration) + '_l' + str(length) + '_emb' + str(int(dim_emb)),
                                  )

            embedding.get_embedding(outdir_tmp, 'optimalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/optimalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(dynamic_sbm_series,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'nm' + str(args.nodemigration) + '_l' + str(length) + '_emb' + str(int(dim_emb)),
                                  )

    elif args.testDataType == 'academic':
        print("datatype:", args.testDataType)

        sample = args.samples
        if not os.path.exists('./test_data/academic/pickle'):
            os.mkdir('./test_data/academic/pickle')
            graphs, length = dataprep_util.get_graph_academic('./test_data/academic/adjlist')
            for i in range(length):
                nx.write_gpickle(graphs[i], './test_data/academic/pickle/' + str(i))
        else:
            length = len(os.listdir('./test_data/academic/pickle'))
            graphs = []
            for i in range(length):
                graphs.append(nx.read_gpickle('./test_data/academic/pickle/' + str(i)))

        G_cen = nx.degree_centrality(graphs[29])  # graph 29 in academia has highest number of edges
        G_cen = sorted(G_cen.items(), key=operator.itemgetter(1), reverse=True)
        node_l = []
        i = 0
        while i < sample:
            node_l.append(G_cen[i][0])
            i += 1
        # pdb.set_trace()
        # node_l = np.random.choice(range(graphs[29].number_of_nodes()), 5000, replace=False)
        # print(node_l)
        for i in range(length):
            graphs[i] = graph_util.sample_graph_nodes(graphs[i], node_l)
        # pdb.set_trace()
        graphs = graphs[-args.timelength:]

        datafile = dataprep_util.prep_input_TIMERS(graphs, args.timelength, args.testDataType)

        embedding = TIMERS(K=dim_emb,
                           Theta=theta,
                           datafile=datafile,
                           length=args.timelength,
                           nodemigration=args.nodemigration,
                           resultdir=args.resultdir,
                           datatype=args.testDataType
                           )
        outdir_tmp = './output'
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        outdir_tmp = outdir_tmp + '/' + args.testDataType
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        if not os.path.exists(outdir_tmp + '/incrementalSVD'):
            os.mkdir(outdir_tmp + '/incrementalSVD')
        if not os.path.exists(outdir_tmp + '/rerunSVD'):
            os.mkdir(outdir_tmp + '/rerunSVD')
        if not os.path.exists(outdir_tmp + '/optimalSVD'):
            os.mkdir(outdir_tmp + '/optimalSVD')

        if args.exp == 'emb':
            print('plotting embedding not implemented!')

        if args.exp == 'lp':
            embedding.learn_embedding()

            outdir = args.resultdir
            if not os.path.exists(outdir):
                os.mkdir(outdir)
            outdir = outdir + '/' + args.testDataType
            if not os.path.exists(outdir):
                os.mkdir(outdir)

            embedding.get_embedding(outdir_tmp, 'incrementalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/incrementalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

            embedding.get_embedding(outdir_tmp, 'rerunSVD')
            outdir1 = outdir + '/rerunSVD'
            # embedding.plotresults()
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

            embedding.get_embedding(outdir_tmp, 'optimalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/optimalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )


    elif args.testDataType == 'hep':
        print("datatype:", args.testDataType)

        if not os.path.exists('./test_data/hep/pickle'):
            os.mkdir('./test_data/hep/pickle')
            files = [file for file in os.listdir('./test_data/hep/hep-th') if '.gpickle' in file]
            length = len(files)
            graphs = []
            for i in range(length):
                G = nx.read_gpickle('./test_data/hep/hep-th/month_' + str(i + 1) + '_graph.gpickle')

                graphs.append(G)
            total_nodes = graphs[-1].number_of_nodes()

            for i in range(length):
                for j in range(total_nodes):
                    if j not in graphs[i].nodes():
                        graphs[i].add_node(j)

            for i in range(length):
                nx.write_gpickle(graphs[i], './test_data/hep/pickle/' + str(i))
        else:
            length = len(os.listdir('./test_data/hep/pickle'))
            graphs = []
            for i in range(length):
                graphs.append(nx.read_gpickle('./test_data/hep/pickle/' + str(i)))

        # pdb.set_trace()
        sample = args.samples
        G_cen = nx.degree_centrality(graphs[-1])  # graph 29 in academia has highest number of edges
        G_cen = sorted(G_cen.items(), key=operator.itemgetter(1), reverse=True)
        node_l = []
        i = 0
        while i < sample:
            node_l.append(G_cen[i][0])
            i += 1
        for i in range(length):
            graphs[i] = graph_util.sample_graph_nodes(graphs[i], node_l)

        graphs = graphs[-args.timelength:]

        datafile = dataprep_util.prep_input_TIMERS(graphs, args.timelength, args.testDataType)

        embedding = TIMERS(K=dim_emb,
                           Theta=theta,
                           datafile=datafile,
                           length=args.timelength,
                           nodemigration=args.nodemigration,
                           resultdir=args.resultdir,
                           datatype=args.testDataType
                           )
        outdir_tmp = './output'
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        outdir_tmp = outdir_tmp + '/' + args.testDataType
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        if not os.path.exists(outdir_tmp + '/incrementalSVD'):
            os.mkdir(outdir_tmp + '/incrementalSVD')
        if not os.path.exists(outdir_tmp + '/rerunSVD'):
            os.mkdir(outdir_tmp + '/rerunSVD')
        if not os.path.exists(outdir_tmp + '/optimalSVD'):
            os.mkdir(outdir_tmp + '/optimalSVD')

        if args.exp == 'emb':
            print('plotting embedding not implemented!')

        if args.exp == 'lp':
            embedding.learn_embedding()

            outdir = args.resultdir
            if not os.path.exists(outdir):
                os.mkdir(outdir)
            outdir = outdir + '/' + args.testDataType
            if not os.path.exists(outdir):
                os.mkdir(outdir)

            embedding.get_embedding(outdir_tmp, 'incrementalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/incrementalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

            embedding.get_embedding(outdir_tmp, 'rerunSVD')
            outdir1 = outdir + '/rerunSVD'
            # embedding.plotresults()
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

            embedding.get_embedding(outdir_tmp, 'optimalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/optimalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

    elif args.testDataType == 'AS':
        print("datatype:", args.testDataType)

        files = [file for file in os.listdir('./test_data/AS/as-733') if '.gpickle' in file]
        length = len(files)
        graphs = []

        for i in range(length):
            G = nx.read_gpickle('./test_data/AS/as-733/month_' + str(i + 1) + '_graph.gpickle')
            graphs.append(G)

        sample = args.samples
        G_cen = nx.degree_centrality(graphs[-1])  # graph 29 in academia has highest number of edges
        G_cen = sorted(G_cen.items(), key=operator.itemgetter(1), reverse=True)
        node_l = []
        i = 0
        while i < sample:
            node_l.append(G_cen[i][0])
            i += 1
        for i in range(length):
            graphs[i] = graph_util.sample_graph_nodes(graphs[i], node_l)

        graphs = graphs[-args.timelength:]

        datafile = dataprep_util.prep_input_TIMERS(graphs, args.timelength, args.testDataType)

        embedding = TIMERS(K=dim_emb,
                           Theta=theta,
                           datafile=datafile,
                           length=args.timelength,
                           nodemigration=args.nodemigration,
                           resultdir=args.resultdir,
                           datatype=args.testDataType
                           )
        outdir_tmp = './output'
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        outdir_tmp = outdir_tmp + '/' + args.testDataType
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        if not os.path.exists(outdir_tmp + '/incrementalSVD'):
            os.mkdir(outdir_tmp + '/incrementalSVD')
        if not os.path.exists(outdir_tmp + '/rerunSVD'):
            os.mkdir(outdir_tmp + '/rerunSVD')
        if not os.path.exists(outdir_tmp + '/optimalSVD'):
            os.mkdir(outdir_tmp + '/optimalSVD')

        if args.exp == 'emb':
            print('plotting embedding not implemented!')

        if args.exp == 'lp':
            embedding.learn_embedding()

            outdir = args.resultdir
            if not os.path.exists(outdir):
                os.mkdir(outdir)
            outdir = outdir + '/' + args.testDataType
            if not os.path.exists(outdir):
                os.mkdir(outdir)

            embedding.get_embedding(outdir_tmp, 'incrementalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/incrementalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

            embedding.get_embedding(outdir_tmp, 'rerunSVD')
            outdir1 = outdir + '/rerunSVD'
            # embedding.plotresults()
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

            embedding.get_embedding(outdir_tmp, 'optimalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/optimalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

    elif args.testDataType == 'enron':
        print("datatype:", args.testDataType)

        files = [file for file in os.listdir('./test_data/enron') if 'month' in file]
        length = len(files)
        # print(length)
        graphsall = []

        for i in range(length):
            G = nx.read_gpickle('./test_data/enron/month_' + str(i + 1) + '_graph.gpickle')
            graphsall.append(G)

        sample = graphsall[0].number_of_nodes()
        graphs = graphsall[-args.timelength:]
        # pdb.set_trace()
        datafile = dataprep_util.prep_input_TIMERS(graphs, args.timelength, args.testDataType)

        embedding = TIMERS(K=dim_emb,
                           Theta=theta,
                           datafile=datafile,
                           length=args.timelength,
                           nodemigration=args.nodemigration,
                           resultdir=args.resultdir,
                           datatype=args.testDataType
                           )
        outdir_tmp = './output'
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        outdir_tmp = outdir_tmp + '/' + args.testDataType
        if not os.path.exists(outdir_tmp):
            os.mkdir(outdir_tmp)
        if not os.path.exists(outdir_tmp + '/incrementalSVD'):
            os.mkdir(outdir_tmp + '/incrementalSVD')
        if not os.path.exists(outdir_tmp + '/rerunSVD'):
            os.mkdir(outdir_tmp + '/rerunSVD')
        if not os.path.exists(outdir_tmp + '/optimalSVD'):
            os.mkdir(outdir_tmp + '/optimalSVD')

        if args.exp == 'emb':
            print('plotting embedding not implemented!')

        if args.exp == 'lp':
            embedding.learn_embedding()

            outdir = args.resultdir
            if not os.path.exists(outdir):
                os.mkdir(outdir)
            outdir = outdir + '/' + args.testDataType
            if not os.path.exists(outdir):
                os.mkdir(outdir)

            embedding.get_embedding(outdir_tmp, 'incrementalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/incrementalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

            embedding.get_embedding(outdir_tmp, 'rerunSVD')
            outdir1 = outdir + '/rerunSVD'
            # embedding.plotresults()
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

            embedding.get_embedding(outdir_tmp, 'optimalSVD')
            # embedding.plotresults()
            outdir1 = outdir + '/optimalSVD'
            if not os.path.exists(outdir1):
                os.mkdir(outdir1)
            lp.expstaticLP_TIMERS(None,
                                  graphs,
                                  embedding,
                                  1,
                                  outdir1 + '/',
                                  'l' + str(args.timelength) + '_emb' + str(int(dim_emb)) + '_samples' + str(sample),
                                  n_sample_nodes=sample
                                  )

Total running time of the script: ( 0 minutes 0.000 seconds)

Gallery generated by Sphinx-Gallery