+ indep. WoS citations

Python and Networks - Homework - 2016-02-16

Problem: Generate an Erdos-Renyi random graph with \(N=10^6\) nodes and \(E=10^6\) links, and plot the cumulated complementary distribution function (CCDF) of its node degrees in two different ways: (1) log-log, (2) lin-log.

Solutions:

1.  FIJ

1.1  Python code (er-deg-ccdf.py)

import random
from sys import argv,stdout

# read parameters: number of nodes (n) and links (e, edges)
script, n, e = argv

# convert n and e to integer
n = int(n); e = int(e)

# === function definitions ===

# Generate Erdos-Renyi graph with N nodes and E links
def er2(n, e, node2neiSet):
    # assuming that n ( n - 1 ) / 2 >> e 

    # current number of links
    eNow = 0

    # add links one-by-one until we reach the requested number of links
    while (eNow < e):

        # the two randomly selected nodes
        r1 = -1; r2 = -1;

        # repeat until the two nodes are the same or they are neighbors
        while r1 == r2 or node2neiSet.has_key(r1) and r2 in node2neiSet[r1]:

            # the two nodes: two random integers
            r1 = random.randint(0,n-1)
            r2 = random.randint(0,n-1)

        # if the either node has no neighbors yet, then declare its neighbor set
        for rNow in (r1,r2):
            if not node2neiSet.has_key(rNow):
                node2neiSet[rNow] = set()

        # save the the 2nd node as a neighbor of the 1st node
        node2neiSet[r1].add(r2)

        # save the 1st node as a neighbor of the 2nd node
        node2neiSet[r2].add(r1)

        # change the number of links
        eNow += 1

# -----------------------------------

def from_node2neiSet_compute_nodeDeg2ccdf_writeToStdout(node2neiSet,numberOfNodes):

    # histogram[d]: the number of nodes that have d neighbors
    histogram = {}

    # the number of isolated nodes (i.e., nodes with zero node degree)
    histogram[0] = numberOfNodes - len(node2neiSet.keys())

    # node numbers with non-zero node degrees
    for node in node2neiSet.keys():
        # d: the degree of the current node        
        nodeDegree = len(node2neiSet[node])
        # if we see this node degree for the first time, then set its counter
        if not histogram.has_key(nodeDegree):
            histogram[nodeDegree] = 0
        # change the counter for this node degree
        histogram[nodeDegree] += 1

    # print output header
    print "# Node degree, 0 replaced with \"-\""
    print "#\tNode degree, 0 included"
    print "#\t\tCCDF"
    print ""

    # ccdf[d]: the probability for a node to have degree above d
    # compute the ccdf from the histogram
    # print output data
    ccdfNow = 1.0
    # loop through the keys of the histogram in ascending order
    for nodeDegree in sorted(histogram):
        # print nonzero node degree or "-"
        if 0 == nodeDegree:
            stdout.write("-\t")
        else:
            stdout.write("%d\t" % nodeDegree)
        # print node degree and CCDF only if CCDF is not 0 + numerical error
        if abs(ccdfNow) > numberOfNodes**(-2.0):
            print "%d\t%.5g" % (nodeDegree,ccdfNow)
        # step to next node degree
        ccdfNow -= histogram[nodeDegree]/(1.0*numberOfNodes)

# === main ===

# node2neiSet[i] is the set of neighbors of node i
node2neiSet = {}

# generate an ER network: return each node's neighbor list
er2(n, e, node2neiSet)

# from the network compute the ccdf of node degrees, write to stdout
from_node2neiSet_compute_nodeDeg2ccdf_writeToStdout(node2neiSet, n)

1.2  How to use the python code

Time: 11 sec

fij@hal:~/tmp$ python er-deg-ccdf.py 1000000 1000000 > er-deg-ccdf.txt

1.3  Output file (er-deg-ccdf.txt)

Example, CCDF values vary a little.

# Node degree, 0 replaced with "-"
#       Node degree, 0 included
#              CCDF

-       0       1
1       1       0.86492
2       2       0.59412
3       3       0.32363
4       4       0.14276
5       5       0.05236
6       6       0.016412
7       7       0.004469
8       8       0.001046
9       9       0.000222
10      10      4.7e-05
11      11      1e-05
12      12      3e-06
13      13      1e-06

1.4  Gnuplot file (er-deg-ccdf.gnu)

# settings
se term posts lands color enh dash "Helvetica" 22
se ylab "CCDF(d) = 1 {/Symbol \055} c.d.f.(d)"
se grid
se o 'er-deg-ccdf.ps'

se lab 'Node degree (d)' at scr .55,-.02 center
se lab "ER model, N=10^6 nodes and E=10^6 links" at scr .55,.75 center
se lab "Same data, x axis linear (left) or logarithmic (right)" at scr .55,.75 center offs 0,-1.2

# open multiplot
se multip

# === left panel: semilog ===

se size .6,.7
se orig 0,0
se log y
unse log x
se xtic (0,5,10)
se ytic ("10^{{/Symbol \055}&{.}1}" 0.1, "10^{{/Symbol \055}&{.}3}" 0.001, "10^{{/Symbol \055}&{.}5}" 0.00001)

# plot
p [-1:14][5e-7:4] 'er-deg-ccdf.txt' u 2:3 w p pt 2 ps 2 lt 1 lw 4 noti

# === right panel: double log ===

se size .5,.7
se orig .5,0
se log xy
se xtic (1,2,4,8)
unse ylab
se ytic ("" 0.1, "" 0.001, "" 0.00001)

# plot
p [.7:18][5e-7:4] 'er-deg-ccdf.txt' u 1:3 w p pt 2 ps 2 lt 1 lw 4 noti

1.5  How to use the gnuplot file

gnuplot er-deg-ccdf.gnu

1.6  Final image (er-deg-ccdf.ps)

2.  KZS

2.1  Python code

#-*- coding: utf-8 -*-
import random as rnd
from sys import argv

#alkalmazom az órán megírt függvényt az erd&#337;s-rényi hálózat készítésére, de közben megszámolom az élek fokszámát is: !!
def erdosrenyi_getNodeDegrees(N, E, linkList): # a függvénynek cím szerint beadjuk az éllistát -> ahhoz ír hozzá!
  if (E > N*(N-1.0)/10): #azaz, ha nem elég ritka hálózatot akarunk csinálni
    print "er: Ne ilyen függvénnyel akarj s&#369;r&#369; hálózatot csinálni!!"
    return

  nodeDegrees = [0] * N #ennek a listának az n. eleme adja meg az n. számú csúcs fokszámát -egyel&#337;re minden eleme nulla
  while(len(linkList) < E): #amíg kevés él van, addig mindig újabbakat adunk hozzá: 1. két csúcs kiválasztása 2. összef&#369;zés 3. ellen&#337;rzés, megvannak-e már a listában
    nodePair=[]
    nodePairStr=""

    while True: #ez egy csel, végignyomjuk a while-t, csak valamikor adunk neki egy break-et
      #the two nodes:
      nodePair = sorted(rnd.sample(range(N), 2), key = int) #2 random szám
      nodePairStr = str(nodePair[0])+" "+str(nodePair[1])  #összef&#369;zés
      if not nodePairStr in linkList: #ellen&#337;rzés: akkor lépünk ki a while-ból, ha valóban új csúcspárt találtunk; az "in" ellen&#337;rizni tudja, hogy benne van-e egy elem a listában 
	break

    linkList.append(nodePairStr) #hozzátesszük az éllistához az új élt
    nodeDegrees[nodePair[0]] += 1 #megnöveljük az új él két végpontjának fokszámát
    nodeDegrees[nodePair[1]] += 1

  return nodeDegrees

#ugyanez másképpen: - erd&#337;s-rényit csinálunk, csak set-tel
def erdosrenyi2_getNodeDegrees(N, E, node2neighSet):
  if (E > N*(N-1.0)/10): #azaz, ha nem elég ritka hálózatot akarunk csinálni
    print "er: Ne ilyen függvénnyel akarj s&#369;r&#369; hálózatot csinálni!!"
    return

  nodeDegrees = [0] * N #a fokszámokat nullára inicializálom

  currentE = 0 #ez mutatja az aktuális élszámot, ezt növeljük
  while currentE < E: #adogatjuk hozzá az éleket, amíg nincs elég
    #ki kell választanunk két csúcsot, amiket összekötünk egy új éllel
    r1 = -1
    r2 = -1  #nem létez&#337; csúcsok kezdésnek..
    while r1 == r2 or (node2neighSet.has_key(r1) and r2 in node2neighSet[r1]): #azaz, ha két azonos cúscsot, vagy már létez&#337; élt választottunk, újakat kell választani...
      r1 = rnd.randint(0, N-1)
      r2 = rnd.randint(0, N-1)

    #ha már sikerült megfelel&#337; élpárt választani:
    for currentNode in (r1, r2): #ha egyik csúcsnak sem volt még szomszédja, akkor hozzá kell adnunk a set-hez az &#337; szomszédlistáját:
      if not node2neighSet.has_key(currentNode):
	node2neighSet[currentNode] = set()

    node2neighSet[r1].add(r2) #hozzáadom r2-t r1 szomszédaihoz, és fordítva
    node2neighSet[r2].add(r1)

    nodeDegrees[r1] += 1 #update-elem a fokszámaikat
    nodeDegrees[r2] += 1

    currentE += 1 #és update-elem az élek számát

  return nodeDegrees 


def getCompDist(N, nodeDegrees): #hasznos, ha meg van adva a csúcsok és élek száma, nem kell leméregetni :)
  complementearyDistFunction = [0] * (max(nodeDegrees)+1) #a fokszám max. az élek száma lehet, a minimuma nulla
  for degree in range(max(nodeDegrees)+1): #L-t&#337;l nulláig végigmegyünka  lehetséges fokszámokondf
    for nodeNmbr in range(N): #az adott fokszámra leszámolom, hogy hány csúcsnak van pont akkora, vagy nagyobb fokszáma
      if(nodeDegrees[nodeNmbr] >= degree):
	complementearyDistFunction[degree] += 1
    #normálás:
    complementearyDistFunction[degree] /= float(N)

  return complementearyDistFunction

def printCompDist(nodeDegrees, complementearyDistFunction): #ezzel a függvénnyel printelem ki "függvényszer&#369;en" a komplementer eloszlást -> ezt majd átirányítom egy fájlba
  print "#degree(Number_of_neighbours) ccdf_value"
  for i in range(max(nodeDegrees)+1):
    print str(i)+" "+str(complementearyDistFunction[i])


#mostmár csak fel kell használni ezeket a függvényeket
L = []
n2nS = {}
script, N, E = argv # a csúcsok és élek számát az argv-ben adom meg
N = int(N)
E = int(E)

D = erdosrenyi2_getNodeDegrees(N, E, n2nS)
ccdf = getCompDist(N, D)
printCompDist(D, ccdf)

2.2  Output

3.  SZE

3.1  Python code

import random

# N nodes, E links

# while the number of links is smaller than E, the program generates more link

def er(n,e,nodeset):

    if(e>n*(n-1.0)/10):
        print "er: link number too high"
        return

    eNow=0;

    while(eNow<e): 

        r1=-1; r2=-1;

        # select the two nodes
        while ((r1==r2)or((nodeset.has_key(r1)) and (r2 in nodeset[r1]))):
            r1=random.randint(0,n-1); r2=random.randint(0,n-1);

        #make new neighbour lists for the nodes if they don't have
        for j in (r1,r2):
            if not nodeset.has_key(j):
                nodeset[j]=set()

        #add new neighbours
        nodeset[r1].add(r2)
        nodeset[r2].add(r1)
        eNow+=1


N=10**6; E=10**6;
nodeset={}

er(N,E,nodeset)

distribution={}

distribution[0]=N-len(nodeset.keys())

for i in (nodeset.keys()):

    # the number of the neighbours of one node
    num=len(nodeset[i])

    # calculate it
    if not distribution.has_key(num):
        distribution[num]=0
    distribution[num]+=1


# printing
i=0
while (distribution.has_key(i)):
    print "%d\t%d" % (i, distribution[i])
    i+=1

3.2  Output

4.  GD

4.1  Python code: er2.py

import random
from sys import argv

# read parameters: number of nodes (n) and links (e, edges)
script, n, e = argv

# === function definitions ===

# Generate Erdos-Renyi graph with N nodes and E links
def er2(n, e, node2neiSet):
    # assuming that n ( n - 1 ) / 2 >> e 

    # current number of links
    eNow = 0

    # add links one-by-one until we reach the requested number of links
    while (eNow < e):

        # the two randomly selected nodes
        r1 = -1; r2 = -1;

        # repeat until the two nodes are the same or they are neighbors
        while r1 == r2 or node2neiSet.has_key(r1) and r2 in node2neiSet[r1]:

            # the two nodes: two random integers
            r1 = random.randint(0,n-1)
            r2 = random.randint(0,n-1)

        # if either node has no neighbors yet, then declare its neighbor set
        for rNow in (r1,r2):
            if not node2neiSet.has_key(rNow):
                node2neiSet[rNow] = set()

        # save the 2nd node as a neighbor of the 1st node
        node2neiSet[r1].add(r2)

        # save the 1st node as a neighbor of the 2nd node
        node2neiSet[r2].add(r1)

        # change the number of links
        eNow += 1

# === main ===

# node2neiSet[i] is the set of neighbors of node i
node2neiSet = {}

# generate an ER network: return each node's neighbor set
er2(int(n), int(e), node2neiSet)

# print out the links
for node in node2neiSet:
    for nei in node2neiSet[node]:
        print str(node) + " " + str(nei)

4.2  Python code: degccdf.py

# Calculates complementary comulative distribution function
# of the degree distribution of a network from a file that has the format
# <node1> <node2>

import sys

# Number of all the nodes is the first argument
script, n_nodes = sys.argv
n_nodes = int(n_nodes)

# Associative array of the nodes
# degree[node] = degree
degrees = {}

# Read in all the links, line by line
for line in sys.stdin:
    linedata = line[:-1].split(" ")
    node1 = linedata[0]
    node2 = linedata[1]

    # If a node is already in the degrees array,
    # number of degrees is incremented,
    # otherwise it's degree is set to one
    if degrees.has_key(node1):
        degrees[node1] += 1
    else:
        degrees[node1] = 1

    # If every link reads twice (once one way, once other way, we do not need this part.
    #if degrees.has_key(node2):
    #    degrees[node2] += 1
    #else:
    #    degrees[node2] = 1

# Calculates maximum number of degrees,
# so we can allocate the array for storing the distribution
max_degrees = 0
for k in degrees:
    if degrees[k] > max_degrees:
        max_degrees = degrees[k]

# Allocating the array for storing the distribution
distribution = []
for i in range(0, max_degrees + 1):
    distribution.append(0)

# Going through every node,
# incrementing its degree by one in the distribution array
for k in degrees:
    distribution[degrees[k]] += 1.0 / n_nodes

distribution[0] = 1.0 * (n_nodes - len(degrees)) / n_nodes
#print distribution[0]

# Printing out the degree distribution (for debug)
#for i in range(0, len(distribution)):
#    print(str(i) + " " + str(distribution[i]))

# Allocating the array for storing the ccdf
ccdf = []
ccdf_currently = 1
for i in range(0, max_degrees + 1):
    ccdf.append(ccdf_currently)
    ccdf_currently -= distribution[i]

# Printing out the ccdf
for i in range(0, len(ccdf)):
   print(str(i) + " " + str(ccdf[i]))

4.3  How to use the code

The output file is ccdf.txt:

python er2.py <n> <e> | python degccdf.py <n> > ccdf.txt

4.4  Output

On the left the two tics (1 and 10) on the x axis do not make it clear that this axis is scaled logarithmically. One or two more tics would be necessary on this axis.

5.  PD

5.1  Python code

import random
from sys import argv,stdout

# read parameters: number of nodes (n) and links (e, edges)
script, n, e = argv
# === function definitions ===
# Generate Erdos-Renyi graph with N nodes and E links

def er2(n, e, node2neiSet):
    # assuming that n ( n - 1 ) / 2 >> e
    # current number of links
    eNow = 0
    # add links one-by-one until we reach the requested number of links
    while (eNow < e):
        # the two randomly selected nodes
        r1 = -1; r2 = -1;
        # repeat until the two nodes are the same or they are neighbors
        while r1 == r2 or node2neiSet.has_key(r1) and r2 in node2neiSet[r1]:
            # the two nodes: two random integers
              r1 = random.randint(0,n-1)
              r2 = random.randint(0,n-1)
        # if either node has no neighbors yet, then declare its neighbor set
        for rNow in (r1,r2):
            if not node2neiSet.has_key(rNow):
                   node2neiSet[rNow] = set()
        # save the 2nd node as a neighbor of the 1st node
        node2neiSet[r1].add(r2)
        # save the 1st node as a neighbor of the 2nd node
        node2neiSet[r2].add(r1)
        # change the number of links
        eNow += 1

# === main ===

m=open("er2_dpapp.dat","w")
m.write("#i\tCCDF\n")
# node2neiSet[i] is the set of neighbors of node i
node2neiSet = {}
# generate an ER network: return each node's neighbor set

er2(int(n), int(e), node2neiSet)

deglist={} # fokszamok 

for i in range(int(n)):
        #nodeDegree=len(node2neiSet[i])
        if  node2neiSet.has_key(i):
                deglist[i]=len(node2neiSet[i]) # megadja az adott szambol hany csucs van
        else:
                deglist[i]=0
#print deglist          
# a kovetkezo listanak azt az elemet fogom megnovelni amekkora a dict-ben a fokszam
#~~~~~~~~~~~~~~~~~~fokszameloszlas~~~~~~~~~~~~~~~~~~~~~~~~~  
dist=[0]*int(n)
for i in range(int(n)):
    dist[deglist[i]]+=1
#~~~~~~~~~~ CCDF ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~    
ccdf=[0]*int(n)
s=sum(dist)
ccdf[0]=s
N=int(n)
for i in range(1,int(n)):
    ccdf[i]=s-dist[i-1]
    sprint=s/(1.0*N)
    m.write("%d\t%.5g\n" %(i,sprint))    
    s=ccdf[i]

5.2  Output

The first symbol is on the frame. It would look better inside.

6.  BB

6.1  Python code

import random
import numpy as np
import matplotlib.pyplot as plt

# Erdos-Renyi graph generator
# Works good if e << n^2
# n nodes, e, degree
def er(n, e, degreeList):
        if (e > n * (n - 1.0) / 10):
                print "er: link number too high"
                return

        linkList = []

        while (len(linkList) < e):
                nodePair = []
                nodePairStr = ""
                while True:
                        # the two nodes
                        nodePair = sorted(random.sample(range(n), 2), key = int)
                        nodePairStr = str(nodePair[0]) + " " + str(nodePair[1])

                        # exit "while" if this is a new link
                        if not nodePairStr in linkList:
                            break

                linkList.append(nodePairStr)
                degreeList[nodePair[0]]+= 1
                degreeList[nodePair[1]]+= 1


n = 100000
e = 100000
degreeList = [0]*n
er(n, e, degreeList)
#print degreeList

# storing the density distribution, in order to calculate the cummulative density function
density, binEdges =  np.histogram(degreeList)
density = density.tolist()
#removing the unnecessary
density[:] = [x for x in  density if x != 0]
#normalizing the density function
norm = sum(density)
unity_density = [1.0*x / norm for x in density]
#calculating the cumulative and the complementary distribution function
cdf = np.cumsum(unity_density)
compCdf = [1 - x for x in cdf]
#plt.loglog(compCdf)
plt.plot(compCdf)
plt.xscale('log')
#plt.yscale('log')
plt.xlabel("log (k)")
plt.ylabel("P(X > k)")
plt.title("Cumulated complementary distribution function of node degrees")
plt.show()

6.2  Output (image)

For these plots the layout in the above script was slightly modified.

7.  BGS

7.1  Python code

import random
from sys import argv,stdout

# read parameters: number of nodes (n) and links (e, edges)
script, n, e = argv
# === function definitions ===
# Generate Erdos-Renyi graph with N nodes and E links
e=int(e)
n=int(n)

def er2(n, e, node2neiSet):
    # assuming that n ( n - 1 ) / 2 >> e
    # current number of links
    eNow = 0
    # add links one-by-one until we reach the requested number of links
    while (eNow < e):
        # the two randomly selected nodes
        r1 = -1; r2 = -1;
        # repeat until the two nodes are the same or they are neighbors
        while r1 == r2 or node2neiSet.has_key(r1) and r2 in node2neiSet[r1]:
            # the two nodes: two random integers
              r1 = random.randint(0,n-1)
              r2 = random.randint(0,n-1)
        # if either node has no neighbors yet, then declare its neighbor set
        for rNow in (r1,r2):
            if not node2neiSet.has_key(rNow):
                   node2neiSet[rNow] = set()
        # save the 2nd node as a neighbor of the 1st node
        node2neiSet[r1].add(r2)
        # save the 1st node as a neighbor of the 2nd node
        node2neiSet[r2].add(r1)
        # change the number of linksprint node2neiSet
        #print "szomszedok:"
        #print node2neiSet
        eNow += 1

def distribution(node2neiSet,n,m):
   histogram={} # fokszamok 
   histogram[0] = n - len(node2neiSet.keys())

   for node in node2neiSet.keys():
      nodeDegree=len(node2neiSet[node])
      if not histogram.has_key(nodeDegree):
	     histogram[nodeDegree] = 0
      histogram[nodeDegree]+=1
   #print "hisztogram:"
   #max=histogram[0]
   #for i in histogram.keys():
    #  if not histogram.has_key(i):
     #    max=max
      #else:   
	   #  if ( max < histogram[i]):
	    #   max=histogram[i]

   #print max(histogram)   
   #print histogram
   dist=[0.0]*(max(histogram)+1)  # legfeljebb ennyi elembol allhat (legnagyobb fokszam) 
   ccdf=[0.0]*(max(histogram)+1)
   j=0
   for i in histogram.keys():
      if not histogram.has_key(i):   # hogy ne legyen Key_error es kezelni tudjuk a hianyzo elemeket 
	     dist[i]=0
      else:
         dist[i]=histogram[i]*1.0    # ccdf definicio: bizonyos ertknel nagyobb vagy egyenlo fokszamok szamlalasa 
   for j in range(len(dist)):
      for k in range(j,len(dist)):
         ccdf[j]+=dist[k]
      ccdf[j]=ccdf[j]*1.0/n

   #for i in histogram.keys():
    #  if not histogram.has_key(i):
	 #    histogram[i]+=0
      #else:
       #  dist[histogram[i]]+=histogram[i]    
   for j in range(len(dist)):
      m.write("%d\t %lf\r\n" %(j,ccdf[j]))
   #print dist
   #print ccdf
# === main ===

m=open("samu.dat","w")
m.write("#k \t ccdf(k)\r\n")
# node2neiSet[i] is the set of neighbors of node i
node2neiSet = {}

er2(n, e, node2neiSet)
distribution(node2neiSet,n,m) 

7.2  Output (image)