+ indep. WoS citations

Python and Networks // 2017-05-09 // Land value vs Net value of Residential properties

Problem

For the residential properties (the value of characters 56 to 58 is "2") of Atlantic county in New Jersey compare the land value (characters 421 to 429) to the net value (characters 439 to 437) on a scatter plot. Note that due to the large number of values it is not practical to plot all land_value - net_value pairs. Plot the results for 2010 and 2016 together.

Solution (example)

1.  Python code (scatter-binned.py)

import sys
import matplotlib.pyplot as plt
import math

# === read command-line arguments ===

_, inFile_2010, inFile_2016, r, outFileImg = sys.argv
r = float(r)

# === Function definitions ===

def read_values(inFile,land_values,net_values):

    # Open data file for reading
    with open(inFile,"r") as f:
        # Read the data file line by line
        for line in f:
            # type of property, remove leading zeros and trailing spaces and tabs
            property_class = line[56:58].lstrip("0").rstrip(" ").rstrip("\t")
            # proceed only if the property is residential, i.e., class = 2
            if "2" == property_class:
                # extract the land value (characters 421 to 439) and the net value (439 to 447)
                # remove leading zeroes from both values
                land_value = line[421:429].lstrip("0")
                net_value = line[439:447].lstrip("0")
                # IF both values are non-empty strings,
                if 0 < len(land_value) and 0 < len(net_value):
                    # THEN save them as an integers
                    land_values.append( int(land_value) )
                    net_values.append( int(net_value) )

# -----------------------------------

def bin_avg(land_val,net_val,r,x,y,n):

    # for each land_value-net_value pair:
    # - find the i index of the bin of the land_value
    # - put the net_value into this bin
    #
    # r: ratio of the two end points of a bin
    # the (i)th bin is [ r^i, r^(i+1) )
    # x[i] geometric mean of the bin's end points: r^(.5+i)
    # y[i]: average of those net_val values for which
    #       the land_val value is in the (i)th bin
    # n[i]: number of points in the (i)th bin
    # s[i]: sum of values in the (i)th bin
    s = {}

    # loop through the list of land_value - net_value pairs
    for counter in range(len(land_val)):
        # round ratio of logarithms down to nearest integer
        i = int( math.log(land_val[counter]) / math.log(r) )
        # IF we see the current bin index for the first time,
        if i not in n:
            # THEN declare the 0 values for the current bin
            n[i] = 0
            s[i] = 0.0
            # AND declare the center of the bin
            x[i] = math.pow(r,0.5+i)
        # in all cases:
        # increment number of values and sum of values for the (i)th bin
        s[i] += net_val[counter]
        n[i] += 1

    # loop through the list of bin indexes
    for i in n:
        # set the average of those net_val values for which
        # the land_val value is in the (i)th bin
        y[i] = 1.0 * s[i] / n[i]

# -------------------------

def scatter_plot(x10dict,y10dict,n10dict,x16dict,y16dict,n16dict,r,outFileImg):

    # lists of the values of the dictionaries
    x10 = list(x10dict.values())
    y10 = list(y10dict.values())
    size10 = list([3.0*math.log(_) for _ in n10dict.values()])
    x16 = list(x16dict.values())
    y16 = list(y16dict.values())
    size16 = list([3.0*math.log(_) for _ in n16dict.values()])

    # gr ("gap ratio"): ratio to keep a gap between min/max values and the frame of the plot
    gr = math.pow(r,1.5)

    # figure and axes
    fig, ax = plt.subplots()
    # set min/max values
    land_min = min(min(x10),min(x16))
    land_max = max(max(x10),max(x16))
    net_min  = min(min(y10),min(y16))
    net_max  = max(max(y10),max(y16))
    # set axis limits
    ax.set_xlim(   left=land_min/gr, right=land_max*gr )
    ax.set_ylim( bottom= net_min/gr,   top= net_max*gr )
    # set axis labels and plot title
    ax.set_xlabel("Land value of a residential property")
    ax.set_ylabel("Net taxable value\nValue of land + improvement value - limited exemptions")
    plt.title("Net value vs. Land value\nSymbol area: proportional to logarithm of number of values in that bin", fontsize=12)
    # set log-log axes
    plt.xscale('log')
    plt.yscale('log')
    # two scatter plots
    plt.scatter(x10,y10,s=size10,alpha=1,label="2010",c='b',edgecolors='none')
    plt.scatter(x16,y16,s=size16,alpha=1,label="2016",c='r',edgecolors='none')
    # draw line: a guide to the eye                                                                                                               
    my_min = max(land_min,net_min) / math.pow(gr,0.5)                                                                                             
    my_max = min(land_max,net_max) * math.pow(gr,0.5)                                                                                             
    my_x = [my_min, my_max]                                                                                                                       
    my_y = [my_min, my_max]                                                                                                                       
    plt.plot(my_x,my_y,label='Net value = Land value',color='g')
    # set the legend
    plt.legend(loc='upper left')
    # save the figure and close it
    fig.savefig(outFileImg)
    plt.close(fig)

# === Main ===

# for 2010 and 2016: read land values and net values of residential properties
land_val_2010 = []; net_val_2010 = []
read_values(inFile_2010,land_val_2010,net_val_2010)
land_val_2016 = []; net_val_2016 = []
read_values(inFile_2016,land_val_2016,net_val_2016)

# average of net value over bins of the land value
# see description inside "def bin_avg"
x10={};y10={};n10={}; bin_avg(land_val_2010,net_val_2010,r,x10,y10,n10)
x16={};y16={};n16={}; bin_avg(land_val_2016,net_val_2016,r,x16,y16,n16)

# make scatter plot
scatter_plot(x10,y10,n10,x16,y16,n16,r,outFileImg)

2.  How to use the code

python3 scatter-bins.py Atlantic10.txt Atlantic16.txt 2 scatter-bins.png

3.  Output image (scatter-bins.png)