+ indep. WoS citations

Python and Networks // Homework 2017-03-14 // ER model degree distribution

Problem

Generate an Erdos-Renyi network (a classical random graph) with \(N\) nodes and \(E\) links. Try several pairs of \((N,E)\) inputs. Plot the CCDF of the network's node degrees for \(N= E=10^6\).

Solution (example)

1.  Python code (er-deg-ccdf.py)

import random
from sys import argv
import matplotlib.pyplot as plt

# === read parameters: number of nodes (n) and links (e, edges) ===

_, n, e, outFileTxt, outFileImg = argv
n = int(n); e = int(e) # convert arguments to int

# === function definitions ===

# Generate Erdos-Renyi graph with N nodes and E links
def er2(n, e, node2neiSet):
    # --------------------------------------
    # !!! assuming that n ( n - 1 ) / 2 >> e 
    # --------------------------------------

    # current number of links
    eNow = 0

    # add links one-by-one until we reach the requested number of links
    while (eNow < e):

        # the two randomly selected nodes
        r1 = -1; r2 = -1;

        # repeat until the two nodes are the same or they are neighbors
        while r1 == r2 or ( r1 in node2neiSet and r2 in node2neiSet[r1] ):

            # the two nodes: two random integers
            r1 = random.randint(0,n-1)
            r2 = random.randint(0,n-1)

        # if either node has no neighbors yet, then declare its neighbor set
        for rNow in (r1,r2):
            if not rNow in node2neiSet:
                node2neiSet[rNow] = set()

        # save the 2nd node as a neighbor of the 1st node
        node2neiSet[r1].add(r2)

        # save the 1st node as a neighbor of the 2nd node
        node2neiSet[r2].add(r1)

        # change the number of links
        eNow += 1

# ---------------

# node2neiSet{node}: the set of neighbors of node <node>
# ccdf{d}: number of nodes with degree at or above d
# n: total number of nodes
def nw2ccdf(node2neiSet,ccdf,n):

    # histogram: h(d) is the number of nodes with degree d
    h = {}
    # set the number of nodes with zero degree
    # note that len(dict) is the number of dict's keys
    h[0] = n - len(node2neiSet)
    # node degree: length of the set of neighbors
    for d in map(lambda node: len(node2neiSet[node]), node2neiSet):
        # IF we have not seen this node degree yet,
        if d not in h:
            # THEN set its number to zero
            h[d] = 0
        # in all cases: remember that we have found
        # one more node with node degree d
        h[d] += 1

    # set the ccdf, starting from the bottom
    ccdf_now = 1.0
    # loop through the list of node degrees in ascending order
    for d in sorted(h):
        ccdf[d] = ccdf_now
        ccdf_now -= 1.0 * h[d] / n

    # test: after the for loop "ccdf_now" should be 0.0
    #print(ccdf_now)

# ---------------

def writePlotCcdf(ccdf,outFileTxt,outFileImg,n,e):

    # --- output txt file ---
    # open text output file
    with open(outFileTxt,"w") as f:
        # print file header
        f.write("# Node degree\n")
        f.write("#\tProbability for a node to have at least this degree\n")
        f.write("\n") # blank line to separate header from data
        # print data
        for d in sorted(ccdf):
            f.write("%d\t%g\n" % (d,ccdf[d]))

    # --- output figure ---
    # initialize the output figure
    fig, ax = plt.subplots()
    # y axis logarithmic, limits of axes
    ax.set_yscale("log")
    ax.set_xlim(left=-1,right=15)
    ax.set_ylim(bottom=5.0e-7,top=2)
    # set axis labels and plot title
    ax.set_xlabel('d: node degree')
    ax.set_ylabel('CCDF(d): probability for a node to have at least d neighbors')
    plt.title("Node degree CCDF in an ER network, N=%d, E=%d" % (n,e))
    #plt.title("Node degree CCDF in an ER network, N=E=10^6")
    # plot data points: ccdf values (y) vs ccdf keys (x)
    x = sorted( ccdf.keys( ) )
    y = [ ccdf[ k ] for k in x ]
    plt.semilogy(x,y,marker='o',mec='r',mfc='w',mew=2,linestyle='None')
    # export figure and close the figure
    fig.savefig(outFileImg)
    plt.close(fig)

# === main ===

# node2neiSet[i] is the set of neighbors of node i
# generate an ER network: return each node's neighbor set
node2neiSet = {}; er2(int(n), int(e), node2neiSet)

# ccdf{d} is the number of nodes that have a node degree equal to or above d
ccdf = {}; nw2ccdf(node2neiSet,ccdf,n)

# write and plot the cdf
writePlotCcdf(ccdf,outFileTxt,outFileImg,n,e)

2.  How to use the python code

python3 er-deg-ccdf.py 1000000 1000000 er-deg-ccdf.txt er-deg-ccdf.png

3.  Output file (er-deg-ccdf.txt)

# Node degree
#       Probability for a node to have at least this degree

0       1
1       0.864394
2       0.593959
3       0.323622
4       0.142967
5       0.052605
6       0.016578
7       0.004509
8       0.001076
9       0.000236
10      4.6e-05
11      6e-06
12      2e-06

4.  Output image (er-deg-ccdf.png)