Lecture 35 – Guest Lecture: Elections and Data

Data 94, Spring 2021

by Lakshya Jain

In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.patches as mpatches
/opt/conda/lib/python3.8/site-packages/geopandas/_compat.py:106: UserWarning: The Shapely GEOS version (3.8.0-CAPI-1.13.1 ) is incompatible with the GEOS version PyGEOS was compiled with (3.9.0-CAPI-1.16.2). Conversions between both will be slow.
  warnings.warn(
In [2]:
# Let's scrape the data
url_62 = 'https://en.wikipedia.org/wiki/1962_California_gubernatorial_election#Results_by_county'
url_14 = 'https://en.wikipedia.org/wiki/2014_California_gubernatorial_election#Results_by_county'

df_62=pd.read_html(url_62, header=0)[29]
df_14=pd.read_html(url_14, header=0)[35]
In [3]:
df_14["Brown"] = df_14["Brown"]/(df_14["Brown"] + df_14["Kashkari"]) * 100
df_62["Brown"] = df_62["Brown"].map(lambda x: x.strip("%")).astype("double")
df_62["Nixon"] = df_62["Nixon"].map(lambda x: x.strip("%")).astype("double")
df_62["Brown"] = df_62["Brown"]/(df_62["Brown"] + df_62["Nixon"]) * 100
In [5]:
df_14.head()
Out[5]:
County Brown % Kashkari %.1
0 Alameda 82.170554 82.2 63593 17.8
1 Alpine 61.873638 61.9 175 38.1
2 Amador 44.554223 44.6 7071 55.4
3 Butte 47.790963 47.8 32249 52.2
4 Calaveras 43.727325 43.7 8841 56.3
In [6]:
df_62.head()
Out[6]:
County Brown Votes Nixon Votes.1 Wyckoff Votes.2
0 Plumas 67.657841 3397 31.76 1624 1.80% 92
1 Trinity 65.716902 2201 33.69 1148 1.73% 59
2 Solano 65.170247 25987 34.37 13888 1.32% 532
3 Shasta 65.248878 14753 34.07 7858 1.96% 453
4 Lassen 64.010651 3500 35.14 1968 2.36% 132
In [9]:
electoral_results = df_14.merge(df_62, on="County")
In [11]:
electoral_results.head()
Out[11]:
County Brown_x % Kashkari %.1 Brown_y Votes Nixon Votes.1 Wyckoff Votes.2
0 Alameda 82.170554 82.2 63593 17.8 58.648594 206861 40.88 145851 1.13% 4038
1 Alpine 61.873638 61.9 175 38.1 35.453896 67 63.21 122 2.07% 4
2 Amador 44.554223 44.6 7071 55.4 59.153784 2811 40.16 1941 1.68% 81
3 Butte 47.790963 47.8 32249 52.2 48.452248 16142 50.79 17172 1.47% 497
4 Calaveras 43.727325 43.7 8841 56.3 47.258459 2379 51.75 2655 1.87% 96
In [12]:
electoral_results = electoral_results[["County", "Brown_x", "Brown_y"]]
electoral_results = electoral_results.rename(columns={'Brown_x': 'Jerry Brown 2014', 'Brown_y': 'Pat Brown 1962'})
electoral_results["Swing"] = electoral_results['Jerry Brown 2014'] - electoral_results['Pat Brown 1962']
In [13]:
electoral_results.head()
Out[13]:
County Jerry Brown 2014 Pat Brown 1962 Swing
0 Alameda 82.170554 58.648594 23.521960
1 Alpine 61.873638 35.453896 26.419743
2 Amador 44.554223 59.153784 -14.599561
3 Butte 47.790963 48.452248 -0.661285
4 Calaveras 43.727325 47.258459 -3.531134
In [15]:
map_df = gpd.read_file("data/CA_Counties/CA_Counties_TIGER2016.shp")
map_df = map_df.to_crs(epsg=3857)
map_df = map_df.merge(electoral_results[["County", "Swing"]], left_on="NAME", right_on="County", how="inner")
In [17]:
bins = [-30, -20, -15, -10, -5, 0, 5, 10, 15, 20, 30]
map_df['Bucket'] = 0.0
bin_names = ["Pat Brown > +30"]
for i in range(len(bins)):
    name = ""
    if bins[i] < 0:
        name = "Pat Brown"
        boundary = bins[i+1]
    else:
        name = "Jerry Brown"
        boundary = bins[i]
    bin_names += [name + ' > +' + str(abs(boundary))]
    map_df.loc[map_df['Swing'] > boundary, "Bucket"] = i

map_df['coords'] = map_df['geometry'].apply(lambda x: x.representative_point().coords[:] if x else None)
map_df['coords'] = [coords[0] if coords else None for coords in map_df['coords']]
In [20]:
map_df.head()
Out[20]:
STATEFP COUNTYFP COUNTYNS GEOID NAME NAMELSAD LSAD CLASSFP MTFCC CSAFP ... FUNCSTAT ALAND AWATER INTPTLAT INTPTLON geometry County Swing Bucket coords
0 06 091 00277310 06091 Sierra Sierra County 06 H1 G4020 None ... A 2468694587 23299110 +39.5769252 -120.5219926 POLYGON ((-13431319.751 4821511.426, -13431312... Sierra -15.248742 0.0 (-13415186.124224039, 4805652.834124008)
1 06 067 00277298 06067 Sacramento Sacramento County 06 H1 G4020 472 ... A 2499183617 76073827 +38.4500114 -121.3404409 POLYGON ((-13490651.476 4680831.603, -13490511... Sacramento 0.670226 5.0 (-13500352.951060079, 4633045.747024901)
2 06 083 00277306 06083 Santa Barbara Santa Barbara County 06 H1 G4020 None ... A 7084000598 2729814515 +34.5370572 -120.0399729 MULTIPOLYGON (((-13423116.772 4042044.149, -13... Santa Barbara 10.155318 7.0 (-13365390.750103593, 4126106.993331197)
3 06 009 01675885 06009 Calaveras Calaveras County 06 H1 G4020 None ... A 2641820834 43806026 +38.1838996 -120.5614415 POLYGON ((-13428575.483 4627725.228, -13428534... Calaveras -3.531134 3.0 (-13431447.264959544, 4603722.785430745)
4 06 111 00277320 06111 Ventura Ventura County 06 H1 G4020 348 ... A 4773390489 945942791 +34.3587415 -119.1331432 MULTIPOLYGON (((-13317853.594 3931602.414, -13... Ventura -1.142807 3.0 (-13255682.598585451, 4089153.3757506693)

5 rows × 22 columns

In [21]:
f, ax = plt.subplots(1, figsize=(15, 15))
cmap = plt.cm.get_cmap("PiYG", len(bin_names))
ax = map_df.plot(column="Bucket", cmap=cmap,
                   edgecolor="grey", linewidth=0.2, ax=ax)

ax.legend([mpatches.Patch(color=cmap(b)) for b in range(len(bin_names))], bin_names, loc=(1.02, .18))

for idx, row in map_df.iterrows():
    if not row['coords']:
        continue
    plt.annotate(s=row['NAME'], xy=row['coords'], horizontalalignment='center', size=6)

ax.set_axis_off()

plt.gca().set_axis_off()
plt.subplots_adjust(top = 0.95, bottom = 0.05, right = 0.95, left = 0.05, 
            hspace = 0.05, wspace = 0.05)
plt.margins(0,0)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())

plt.title("Pat Brown vs Jerry Brown: 1962-2014")
plt.figtext(0.99, 0.175, 'Data: Wikipedia', horizontalalignment='right')
plt.show()
<ipython-input-21-5f881e4a9e90>:11: MatplotlibDeprecationWarning: The 's' parameter of annotate() has been renamed 'text' since Matplotlib 3.3; support for the old name will be dropped two minor releases later.
  plt.annotate(s=row['NAME'], xy=row['coords'], horizontalalignment='center', size=6)