# Import modules
import numpy as np
import pandas as pd
import geopandas as gpd
import networkx as nx
import osmnx as ox
import re
from shapely.geometry import Point, LineString, Polygon
import matplotlib.pyplot as plt
from tqdm import tqdm
import multiprocessing as mp
import folium
import itertools
import os
import time
import warnings
import IPython
import requests
from IPython.display import display, clear_output

warnings.filterwarnings("ignore")
print('\n'.join(f'{m.__name__}=={m.__version__}' for m in globals().values() if getattr(m, '__version__', None)))

numpy==1.22.0
pandas==1.3.5
geopandas==0.10.2
networkx==2.6.3
osmnx==1.1.2
re==2.2.1
folium==0.12.1.post1
IPython==8.3.0
requests==2.27.1


# Check working directory
os.getcwd()

'/home/jovyan/work/RPr-Kang-2020/procedure/code'


# Use to set work directory properly
if os.path.basename(os.getcwd()) == 'code':
    os.chdir('../../')
os.getcwd()

'/home/jovyan/work/RPr-Kang-2020'


# Read in at risk population data
atrisk_data = gpd.read_file('./data/raw/public/PopData/Illinois_Tract.shp')
atrisk_data.head()


# Read in covid case data
covid_data = gpd.read_file('./data/raw/public/PopData/Chicago_ZIPCODE.shp')
covid_data['cases'] = covid_data['cases']
covid_data.head()


# Read in hospital data
hospitals = gpd.read_file('./data/raw/public/HospitalData/Chicago_Hospital_Info.shp')
hospitals.head()


# Plot hospital data
m = folium.Map(location=[41.85, -87.65], tiles='cartodbpositron', zoom_start=10)
for i in range(0, len(hospitals)):
    folium.CircleMarker(
      location=[hospitals.iloc[i]['Y'], hospitals.iloc[i]['X']],
      popup="{}{}\n{}{}\n{}{}".format('Hospital Name: ',hospitals.iloc[i]['Hospital'],
                                      'ICU Beds: ',hospitals.iloc[i]['Adult ICU'],
                                      'Ventilators: ', hospitals.iloc[i]['Total Vent']),
      radius=5,
      color='blue',
      fill=True,
      fill_opacity=0.6,
      legend_name = 'Hospitals'
    ).add_to(m)
legend_html =   '''<div style="position: fixed; width: 20%; heigh: auto;
                            bottom: 10px; left: 10px;
                            solid grey; z-index:9999; font-size:14px;
                            ">&nbsp; Legend<br>'''

m


# Read in and plot grid file for Chicago
grid_file = gpd.read_file('./data/raw/public/GridFile/Chicago_Grid.shp')
grid_file.plot(figsize=(8,8))

<AxesSubplot:>


%%time
# To create a new graph from OpenStreetMap, delete or rename data/raw/private/Chicago_Network_Buffer.graphml 
# (if it exists), and set OSM to True 
OSM = False

# if buffered street network is not saved, and OSM is preferred, # generate a new graph from OpenStreetMap and save it
if not os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml") and OSM:
    print("Loading buffered Chicago road network from OpenStreetMap. Please wait... runtime may exceed 9min...", flush=True)
    G = ox.graph_from_place('Chicago', network_type='drive', buffer_dist=24140.2) 
    print("Saving Chicago road network to raw/private/Chicago_Network_Buffer.graphml. Please wait...", flush=True)
    ox.save_graphml(G, './data/raw/private/Chicago_Network_Buffer.graphml')
    print("Data saved.")

# otherwise, if buffered street network is not saved, download graph from the OSF project
elif not os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml"):
    print("Downloading buffered Chicago road network from OSF...", flush=True)
    url = 'https://osf.io/download/z8ery/'
    r = requests.get(url, allow_redirects=True)
    print("Saving buffered Chicago road network to file...", flush=True)
    open('./data/raw/private/Chicago_Network_Buffer.graphml', 'wb').write(r.content)

# if the buffered street network is already saved, load it
if os.path.exists("./data/raw/private/Chicago_Network_Buffer.graphml"):
    print("Loading buffered Chicago road network from raw/private/Chicago_Network_Buffer.graphml. Please wait...", flush=True)
    G = ox.load_graphml('./data/raw/private/Chicago_Network_Buffer.graphml') 
    print("Data loaded.") 
else:
    print("Error: could not load the road network from file.")

Loading buffered Chicago road network from raw/private/Chicago_Network_Buffer.graphml. Please wait...
Data loaded.
CPU times: user 37.8 s, sys: 1.56 s, total: 39.4 s
Wall time: 39.4 s


%%time
ox.plot_graph(G, node_size = 1, bgcolor = 'white', node_color = 'black', edge_color = "#333333", node_alpha = 0.5, edge_linewidth = 0.5)

CPU times: user 59 s, sys: 359 ms, total: 59.4 s
Wall time: 59.2 s

(<Figure size 576x576 with 1 Axes>, <AxesSubplot:>)


%%time
# Turn nodes and edges into geodataframes
nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)

# Get unique counts of road segments for each speed limit
print(edges['maxspeed'].value_counts())
print(str(len(edges)) + " edges in graph")

# can we also visualize highways / roads with higher speed limits to check accuracy?
# the code above converts the graph into an edges geodataframe, which could theoretically be filtered
# by fast road segments and mapped, e.g. in folium

25 mph                        4793
30 mph                        3555
35 mph                        3364
40 mph                        2093
45 mph                        1418
20 mph                        1155
55 mph                         614
60 mph                         279
50 mph                         191
40                              79
15 mph                          76
70 mph                          71
65 mph                          54
10 mph                          38
[40 mph, 45 mph]                27
[30 mph, 35 mph]                26
45,30                           24
[40 mph, 35 mph]                22
70                              21
25                              20
[55 mph, 45 mph]                16
25, east                        14
[45 mph, 35 mph]                13
[30 mph, 25 mph]                10
[45 mph, 50 mph]                 8
50                               8
[40 mph, 30 mph]                 7
[35 mph, 25 mph]                 6
[55 mph, 60 mph]                 5
20                               4
[70 mph, 60 mph]                 3
[65 mph, 60 mph]                 3
[40 mph, 45 mph, 35 mph]         3
[70 mph, 65 mph]                 2
[70 mph, 45 mph, 5 mph]          2
[40, 45 mph]                     2
[35 mph, 50 mph]                 2
35                               2
[55 mph, 65 mph]                 2
[40 mph, 50 mph]                 2
[15 mph, 25 mph]                 2
[40 mph, 35 mph, 25 mph]         2
[15 mph, 40 mph, 30 mph]         2
[20 mph, 25 mph]                 2
[30 mph, 25, east]               2
[65 mph, 55 mph]                 2
[20 mph, 35 mph]                 2
[55 mph, 55]                     2
55                               2
[15 mph, 30 mph]                 2
[45 mph, 30 mph]                 2
[15 mph, 45 mph]                 2
[55 mph, 45, east, 50 mph]       2
[20 mph, 30 mph]                 1
[5 mph, 45 mph, 35 mph]          1
[55 mph, 35 mph]                 1
[5 mph, 35 mph]                  1
[55 mph, 50 mph]                 1
Name: maxspeed, dtype: int64
384240 edges in graph
CPU times: user 36.6 s, sys: 70.5 ms, total: 36.7 s
Wall time: 36.7 s


edges.head()


# two things about this function:
# 1) the work to remove nodes is hardly worth it now that OSMnx cleans graphs by default
# the function is now only pruning < 300 nodes
# 2) try using the OSMnx speed module for setting speeds, travel times
# https://osmnx.readthedocs.io/en/stable/user-reference.html#module-osmnx.speed
# just be careful about units of speed and time!
# the remainder of this code expects 'time' to be measured in minutes

def network_setting(network):
    _nodes_removed = len([n for (n, deg) in network.out_degree() if deg ==0])
    network.remove_nodes_from([n for (n, deg) in network.out_degree() if deg ==0])
    for component in list(nx.strongly_connected_components(network)):
        if len(component)<10:
            for node in component:
                _nodes_removed+=1
                network.remove_node(node)
    for u, v, k, data in tqdm(G.edges(data=True, keys=True),position=0):
        if 'maxspeed' in data.keys():
            speed_type = type(data['maxspeed'])
            if (speed_type==str):
                # Add in try/except blocks to catch maxspeed formats that don't fit Kang et al's cases
                try:
                    if len(data['maxspeed'].split(','))==2:
                        data['maxspeed_fix']=float(data['maxspeed'].split(',')[0])                  
                    elif data['maxspeed']=='signals':
                        data['maxspeed_fix']=30.0 # drive speed setting as 35 miles
                    else:
                        data['maxspeed_fix']=float(data['maxspeed'].split()[0])
                except:
                    data['maxspeed_fix']=30.0 #miles
            else:
                try:
                    data['maxspeed_fix']=float(data['maxspeed'][0].split()[0])
                except:
                    data['maxspeed_fix']=30.0 #miles
        else:
            data['maxspeed_fix']=30.0 #miles
        data['maxspeed_meters'] = data['maxspeed_fix']*26.8223 # convert mile per hour to meters per minute
        data['time'] = float(data['length'])/ data['maxspeed_meters'] # meters / meters per minute = minutes
    print("Removed {} nodes ({:2.4f}%) from the OSMNX network".format(_nodes_removed, _nodes_removed/float(network.number_of_nodes())))
    print("Number of nodes: {}".format(network.number_of_nodes()))
    print("Number of edges: {}".format(network.number_of_edges()))    
    return(network)


%%time
# G, hospitals, grid_file, pop_data = file_import (population_dropdown.value)
G = network_setting(G)
# Create point geometries for each node in the graph, to make constructing catchment area polygons easier
for node, data in G.nodes(data=True):
    data['geometry']=Point(data['x'], data['y'])
# Modify code to react to processor dropdown (got rid of file_import function)

100%|██████████| 383911/383911 [00:01<00:00, 332188.68it/s]

Removed 274 nodes (0.0019%) from the OSMNX network
Number of nodes: 142044
Number of edges: 383911
CPU times: user 4.55 s, sys: 435 ms, total: 4.98 s
Wall time: 4.97 s


%%time
## Get unique counts for each road network
# Turn nodes and edges in geodataframes
nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)

# Count
print(edges['maxspeed_fix'].value_counts())
print(str(len(edges)) + " edges in graph")

30.0    369474
25.0      4827
35.0      3374
40.0      2237
45.0      1465
20.0      1161
55.0       637
60.0       277
50.0       199
70.0        89
15.0        84
65.0        47
10.0        38
5.0          2
Name: maxspeed_fix, dtype: int64
383911 edges in graph
CPU times: user 37.6 s, sys: 168 ms, total: 37.8 s
Wall time: 37.7 s


def hospital_setting(hospitals, G):
    # Create an empty column 
    hospitals['nearest_osm']=None
    # Append the neaerest osm column with each hospitals neaerest osm node
    for i in tqdm(hospitals.index, desc="Find the nearest network node from hospitals", position=0):
        hospitals['nearest_osm'][i] = ox.get_nearest_node(G, [hospitals['Y'][i], hospitals['X'][i]], method='euclidean') # find the nearest node from hospital location
    print ('hospital setting is done')
    return(hospitals)


def pop_centroid (pop_data, pop_type):
    pop_data = pop_data.to_crs({'init': 'epsg:4326'})
    # If pop is selected in dropdown, select at risk pop where population is greater than 0
    if pop_type =="pop":
        pop_data=pop_data[pop_data['OverFifty']>=0]
    # If covid is selected in dropdown, select where covid cases are greater than 0
    if pop_type =="covid":
        pop_data=pop_data[pop_data['cases']>=0]
    pop_cent = pop_data.centroid # it make the polygon to the point without any other information
    # Convert to gdf
    pop_centroid = gpd.GeoDataFrame()
    i = 0
    for point in tqdm(pop_cent, desc='Pop Centroid File Setting', position=0):
        if pop_type== "pop":
            pop = pop_data.iloc[i]['OverFifty']
            code = pop_data.iloc[i]['GEOID']
        if pop_type =="covid":
            pop = pop_data.iloc[i]['cases']
            code = pop_data.iloc[i].ZCTA5CE10
        pop_centroid = pop_centroid.append({'code':code,'pop': pop,'geometry': point}, ignore_index=True)
        i = i+1
    return(pop_centroid)


def dijkstra_cca_polygons(G, nearest_osm, distances, distance_unit = "time"):
    
    '''
    
    Before running: must assign point geometries to street nodes
    
    # create point geometries for the entire graph
    for node, data in G.nodes(data=True):
    data['geometry']=Point(data['x'], data['y'])
    
    '''
    
    ## CREATE DICTIONARIES
    # create dictionary of nearest nodes
    nearest_nodes_30 = nx.single_source_dijkstra_path_length(G, nearest_osm, distances[2], distance_unit) # creating the largest graph from which 10 and 20 minute drive times can be extracted from
    
    # extract values within 20 and 10 (respectively) minutes drive times
    nearest_nodes_20 = dict()
    nearest_nodes_10 = dict()
    for key, value in nearest_nodes_30.items():
        if value <= 20:
            nearest_nodes_20[key] = value
        if value <= 10:
            nearest_nodes_10[key] = value
    
    ## CREATE POLYGONS FOR 3 DISTANCE CATEGORIES (10 min, 20 min, 30 min)
    # 30 MIN
    # If the graph already has a geometry attribute with point data,
    # this line will create a GeoPandas GeoDataFrame from the nearest_nodes_30 dictionary
    points_30 = gpd.GeoDataFrame(gpd.GeoSeries(nx.get_node_attributes(G.subgraph(nearest_nodes_30), 'geometry')))

    # This line converts the nearest_nodes_30 dictionary into a Pandas data frame and joins it to points
    # left_index=True and right_index=True are options for merge() to join on the index values
    points_30 = points_30.merge(pd.Series(nearest_nodes_30).to_frame(), left_index=True, right_index=True)

    # Re-name the columns and set the geodataframe geometry to the geometry column
    points_30 = points_30.rename(columns={'0_x':'geometry','0_y':'z'}).set_geometry('geometry')

    # Create a convex hull polygon from the points
    polygon_30 = gpd.GeoDataFrame(gpd.GeoSeries(points_30.unary_union.convex_hull))
    polygon_30 = polygon_30.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # 20 MIN
    # Select nodes less than or equal to 20
    points_20 = points_30.query("z <= 20")
    
    # Create a convex hull polygon from the points
    polygon_20 = gpd.GeoDataFrame(gpd.GeoSeries(points_20.unary_union.convex_hull))
    polygon_20 = polygon_20.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # 10 MIN
    # Select nodes less than or equal to 10
    points_10 = points_30.query("z <= 10")
    
    # Create a convex hull polygon from the points
    polygon_10 = gpd.GeoDataFrame(gpd.GeoSeries(points_10.unary_union.convex_hull))
    polygon_10 = polygon_10.rename(columns={0:'geometry'}).set_geometry('geometry')
    
    # Create empty list and append polygons
    polygons = []
    
    # Append
    polygons.append(polygon_10)
    polygons.append(polygon_20)
    polygons.append(polygon_30)
    
    # Clip the overlapping distance ploygons (create two donuts + hole)
    for i in reversed(range(1, len(distances))):
        polygons[i] = gpd.overlay(polygons[i], polygons[i-1], how="difference")

    return polygons


def hospital_measure_acc (_thread_id, hospital, pop_data, distances, weights):
    # Create polygons
    polygons = dijkstra_cca_polygons(G, hospital['nearest_osm'], distances)
    
    # Calculate accessibility measurements
    num_pops = []
    for j in pop_data.index:
        point = pop_data['geometry'][j]
        # Multiply polygons by weights
        for k in range(len(polygons)):
            if len(polygons[k]) > 0: # To exclude the weirdo (convex hull is not polygon)
                if (point.within(polygons[k].iloc[0]["geometry"])):
                    num_pops.append(pop_data['pop'][j]*weights[k])  
    total_pop = sum(num_pops)
    for i in range(len(distances)):
        polygons[i]['time']=distances[i]
        polygons[i]['total_pop']=total_pop
        polygons[i]['hospital_icu_beds'] = float(hospital['Adult ICU'])/polygons[i]['total_pop'] # proportion of # of beds over pops in 10 mins
        polygons[i]['hospital_vents'] = float(hospital['Total Vent'])/polygons[i]['total_pop'] # proportion of # of beds over pops in 10 mins
        polygons[i].crs = { 'init' : 'epsg:4326'}
        polygons[i] = polygons[i].to_crs({'init':'epsg:32616'})
    print('{:.0f}'.format(_thread_id), end=" ", flush=True)
    return(_thread_id, [ polygon.copy(deep=True) for polygon in polygons ])


def hospital_acc_unpacker(args):
    return hospital_measure_acc(*args)

# WHERE THE RESULTS ARE POOLED AND THEN REAGGREGATED
def measure_acc_par (hospitals, pop_data, network, distances, weights, num_proc = 4):
    catchments = []
    for distance in distances:
        catchments.append(gpd.GeoDataFrame())
    pool = mp.Pool(processes = num_proc)
    hospital_list = [ hospitals.iloc[i] for i in range(len(hospitals)) ]
    print("Calculating", len(hospital_list), "hospital catchments...\ncompleted number:", end=" ")
    results = pool.map(hospital_acc_unpacker, zip(range(len(hospital_list)), hospital_list, itertools.repeat(pop_data), itertools.repeat(distances), itertools.repeat(weights)))
    pool.close()
    results.sort()
    results = [ r[1] for r in results ]
    for i in range(len(results)):
        for j in range(len(distances)):
            catchments[j] = catchments[j].append(results[i][j], sort=False)
    return catchments


from collections import Counter
def overlap_calc(_id, poly, grid_file, weight, service_type):
    value_dict = Counter()
    if type(poly.iloc[0][service_type])!=type(None):           
        value = float(poly[service_type])*weight
        intersect = gpd.overlay(grid_file, poly, how='intersection')
        intersect['overlapped']= intersect.area
        intersect['percent'] = intersect['overlapped']/intersect['area']
        intersect=intersect[intersect['percent']>=0.5]
        intersect_region = intersect['id']
        for intersect_id in intersect_region:
            try:
                value_dict[intersect_id] +=value
            except:
                value_dict[intersect_id] = value
    return(_id, value_dict)

def overlap_calc_unpacker(args):
    return overlap_calc(*args)


def overlapping_function (grid_file, catchments, service_type, weights, num_proc = 4):
    grid_file[service_type]=0
    pool = mp.Pool(processes = num_proc)
    acc_list = []
    for i in range(len(catchments)):
        acc_list.extend([ catchments[i][j:j+1] for j in range(len(catchments[i])) ])
    acc_weights = []
    for i in range(len(catchments)):
        acc_weights.extend( [weights[i]]*len(catchments[i]) )
    results = pool.map(overlap_calc_unpacker, zip(range(len(acc_list)), acc_list, itertools.repeat(grid_file), acc_weights, itertools.repeat(service_type)))
    pool.close()
    results.sort()
    results = [ r[1] for r in results ]
    service_values = results[0]
    for result in results[1:]:
        service_values+=result
    for intersect_id, value in service_values.items():
        grid_file.loc[grid_file['id']==intersect_id, service_type] += value
    return(grid_file)


def normalization (result, res):
    result[res]=(result[res]-min(result[res]))/(max(result[res])-min(result[res]))
    return result


def output_map(output_grid, base_map, hospitals, resource):
    ax=output_grid.plot(column=resource, cmap='PuBuGn',figsize=(18,12), legend=True, zorder=1)
    # Next two lines set bounds for our x- and y-axes because it looks like there's a weird 
    # Point at the bottom left of the map that's messing up our frame (Maja)
    ax.set_xlim([314000, 370000])
    ax.set_ylim([540000, 616000])
    base_map.plot(ax=ax, facecolor="none", edgecolor='gray', lw=0.1)
    hospitals.plot(ax=ax, markersize=10, zorder=1, c='blue')


import ipywidgets
from IPython.display import display

processor_dropdown = ipywidgets.Dropdown( options=[("1", 1), ("2", 2), ("3", 3), ("4", 4)],
    value = 4, description = "Processor: ")

population_dropdown = ipywidgets.Dropdown( options=[("Population at Risk", "pop"), ("COVID-19 Patients", "covid") ],
    value = "pop", description = "Population: ")

resource_dropdown = ipywidgets.Dropdown( options=[("ICU Beds", "hospital_icu_beds"), ("Ventilators", "hospital_vents") ],
    value = "hospital_icu_beds", description = "Resource: ")

hospital_dropdown =  ipywidgets.Dropdown( options=[("All hospitals", "hospitals"), ("Subset", "hospital_subset") ],
    value = "hospitals", description = "Hospital:")

display(processor_dropdown,population_dropdown,resource_dropdown,hospital_dropdown)

Dropdown(description='Processor: ', index=3, options=(('1', 1), ('2', 2), ('3', 3), ('4', 4)), value=4)

Dropdown(description='Population: ', options=(('Population at Risk', 'pop'), ('COVID-19 Patients', 'covid')), …

Dropdown(description='Resource: ', options=(('ICU Beds', 'hospital_icu_beds'), ('Ventilators', 'hospital_vents…

Dropdown(description='Hospital:', options=(('All hospitals', 'hospitals'), ('Subset', 'hospital_subset')), val…


if population_dropdown.value == "pop":
    pop_data = pop_centroid(atrisk_data, population_dropdown.value)
elif population_dropdown.value == "covid":
    pop_data = pop_centroid(covid_data, population_dropdown.value)
distances=[10,20,30] # Distances in travel time
weights=[1.0, 0.68, 0.22] # Weights where weights[0] is applied to distances[0]
# Other weighting options representing different distance decays
# weights1, weights2, weights3 = [1.0, 0.42, 0.09], [1.0, 0.75, 0.5], [1.0, 0.5, 0.1]
# it is surprising how long this function takes just to calculate centroids.
# why not do it with the geopandas/pandas functions rather than iterating through every item?

Pop Centroid File Setting: 100%|██████████| 3121/3121 [03:49<00:00, 13.60it/s]


# Set hospitals according to hospital dropdown
if hospital_dropdown.value == "hospital_subset":
    hospitals = hospital_setting(hospitals[:1], G)
else: 
    hospitals = hospital_setting(hospitals, G)
resources = ["hospital_icu_beds", "hospital_vents"] # resources
# this is also slower than it needs to be; if network nodes and hospitals are both
# geopandas data frames, it should be possible to do a much faster spatial join rather than iterating through every hospital

Find the nearest network node from hospitals: 100%|██████████| 66/66 [01:25<00:00,  1.30s/it]

hospital setting is done


# # Create point geometries for entire graph
# # what is the pupose of the following two lines? Can this be deleted?
# # for node, data in G.nodes(data=True):
# #     data['geometry']=Point(data['x'], data['y'])

# # which hospital to visualize? 
# fighosp = 4

# # Create catchment for hospital 0
# poly = dijkstra_cca_polygons(G, hospitals['nearest_osm'][fighosp], distances)

# # Reproject polygons
# for i in range(len(poly)):
#     poly[i].crs = { 'init' : 'epsg:4326'}
#     poly[i] = poly[i].to_crs({'init':'epsg:32616'})

# # Reproject hospitals 
# # Possible to map from the hospitals data rather than creating hospital_subset?
# hospital_subset = hospitals.iloc[[fighosp]].to_crs(epsg=32616)

# fig, ax = plt.subplots(figsize=(12,8))

# min_10 = poly[0].plot(ax=ax, color="royalblue", label="10 min drive")
# min_20 = poly[1].plot(ax=ax, color="cornflowerblue", label="20 min drive")
# min_30 = poly[2].plot(ax=ax, color="lightsteelblue", label="30 min drive")

# hospital_subset.plot(ax=ax, color="red", legend=True, label = "hospital")

# # Add legend
# ax.legend()

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File /cvmfs/cybergis.illinois.edu/software/conda/cybergisx/python3-0.9.0/lib/python3.8/site-packages/pandas/core/indexes/range.py:385, in RangeIndex.get_loc(self, key, method, tolerance)
    384 try:
--> 385     return self._range.index(new_key)
    386 except ValueError as err:

ValueError: 4 is not in range

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Input In [42], in <cell line: 10>()
      7 fighosp = 4
      9 # Create catchment for hospital 0
---> 10 poly = dijkstra_cca_polygons(G, hospitals['nearest_osm'][fighosp], distances)
     12 # Reproject polygons
     13 for i in range(len(poly)):

File /cvmfs/cybergis.illinois.edu/software/conda/cybergisx/python3-0.9.0/lib/python3.8/site-packages/pandas/core/series.py:942, in Series.__getitem__(self, key)
    939     return self._values[key]
    941 elif key_is_scalar:
--> 942     return self._get_value(key)
    944 if is_hashable(key):
    945     # Otherwise index.get_value will raise InvalidIndexError
    946     try:
    947         # For labels that don't resolve as scalars like tuples and frozensets

File /cvmfs/cybergis.illinois.edu/software/conda/cybergisx/python3-0.9.0/lib/python3.8/site-packages/pandas/core/series.py:1051, in Series._get_value(self, label, takeable)
   1048     return self._values[label]
   1050 # Similar to Index.get_value, but we do not fall back to positional
-> 1051 loc = self.index.get_loc(label)
   1052 return self.index._get_values_for_loc(self, loc, label)

File /cvmfs/cybergis.illinois.edu/software/conda/cybergisx/python3-0.9.0/lib/python3.8/site-packages/pandas/core/indexes/range.py:387, in RangeIndex.get_loc(self, key, method, tolerance)
    385             return self._range.index(new_key)
    386         except ValueError as err:
--> 387             raise KeyError(key) from err
    388     raise KeyError(key)
    389 return super().get_loc(key, method=method, tolerance=tolerance)

KeyError: 4


poly

[                                            geometry
 0  POLYGON ((443394.989 4617198.021, 443294.926 4...,
                                             geometry
 0  POLYGON ((440994.972 4609778.186, 440177.105 4...,
                                             geometry
 0  POLYGON ((442100.513 4601490.789, 438695.945 4...]


%%time
catchments = measure_acc_par(hospitals, pop_data, G, distances, weights, num_proc=processor_dropdown.value)

Calculating 66 hospital catchments...
completed number: 150 5  10 6 1 16 11 2 7 17 12 3 8 18 13 4 9 19 14 20 25 30 35 21 26 36 31 22 27 37 32 23 28 38 33 24 29 39 34 40 45 50 55 41 46 51 56 42 47 52 57 43 48 58 53 44 49 59 54 60 65 61 62 63 64 CPU times: user 2.38 s, sys: 502 ms, total: 2.88 s
Wall time: 2min 3s


# add weight field to each catchment polygon
for i in range(len(weights)):
    catchments[i]['weight'] = weights[i]
# combine the three sets of catchment polygons into one geodataframe
geocatchments = pd.concat([catchments[0], catchments[1], catchments[2]])
geocatchments


%%time
# set weighted to False for original 50% threshold method
# switch to True for area-weighted overlay
weighted = True

# if the value to be calculated is already in the hegaxon grid, delete it
# otherwise, the field name gets a suffix _1 in the overlay step
if resource_dropdown.value in list(grid_file.columns.values):
    grid_file = grid_file.drop(resource_dropdown.value, axis = 1)
    
# calculate hexagon 'target' areas
grid_file['area'] = grid_file.area
    
# Intersection overlay of hospital catchments and hexagon grid
print("Intersecting hospital catchments with hexagon grid...")
fragments = gpd.overlay(grid_file, geocatchments, how='intersection')

# Calculate percent coverage of the hexagon by the hospital catchment as
# fragment area / target(hexagon) area
fragments['percent'] = fragments.area / fragments['area']

# if using weighted aggregation... 
if weighted:
    print("Calculating area-weighted value...")
    # multiply the service/population ratio by the distance weight and the percent coverage
    fragments['value'] = fragments[resource_dropdown.value] * fragments['weight'] * fragments['percent']

# if using the 50% coverage rule for unweighted aggregation...
else:
    print("Calculating value for hexagons with >=50% overlap...")
    # filter for only the fragments with > 50% coverage by hospital catchment
    fragments = fragments[fragments['percent']>=0.5]
    # multiply the service/population ration by the distance weight
    fragments['value'] = fragments[resource_dropdown.value] * fragments['weight']

# select just the hexagon id and value from the fragments,
# group the fragments by the (hexagon) id,
# and sum the values
print("Summarizing results by hexagon id...")
sum_results = fragments[['id', 'value']].groupby(by = ['id']).sum()

# join the results to the hexagon grid_file based on hexagon id
print("Joining results to hexagons...")
result_new = pd.merge(grid_file, sum_results, how="left", on = "id")

# rename value column name to the resource name
result_new.rename(columns = {'value' : resource_dropdown.value})

Intersecting hospital catchments with hexagon grid...
Calculating area-weighted value...
Summarizing results by hexagon id...
Joining results to hexagons...
CPU times: user 11.7 s, sys: 108 ms, total: 11.8 s
Wall time: 11.8 s


%%time
for j in range(len(catchments)):
    catchments[j] = catchments[j][catchments[j][resource_dropdown.value]!=float('inf')]
result=overlapping_function(grid_file, catchments, resource_dropdown.value, weights, num_proc=processor_dropdown.value)

CPU times: user 6.45 s, sys: 446 ms, total: 6.89 s
Wall time: 17.9 s


%%time
result = normalization (result, resource_dropdown.value)

CPU times: user 1.34 ms, sys: 1.02 ms, total: 2.35 ms
Wall time: 2.14 ms


result.head()


%%time
hospitals = hospitals.to_crs({'init': 'epsg:26971'})
result = result.to_crs({'init': 'epsg:26971'})
output_map(result, pop_data, hospitals, resource_dropdown.value)

#save 
plt.savefig('./results/figures/reproduced_reproduction/{}_{}__continuous.png'.format(population_dropdown.value, resource_dropdown.value))

CPU times: user 2.35 s, sys: 409 ms, total: 2.76 s
Wall time: 2.16 s


# Import study results to compare
# hospital_i assumed to be for ICU and hospital_v assumed to be for ventilator
# however it's unknown whether the population is the COVID-19 population or the AT RISK population
fp = 'data/derived/public/Chicago_ACC.shp'
og_result = gpd.read_file(fp)
og_result.set_index("id")
og_result.head()


result.set_index("id")
result_compare = result.join(og_result[["hospital_i","hospital_v"]])
result_compare.head()


from scipy import stats


# set original_resource variable to the name of original results column matching the modeling choice
if resource_dropdown.value == "icu_beds":
    original_resource = "hospital_i"
else:
    original_resource = "hospital_v"

# calculate spearman's rho
rho = stats.spearmanr(result_compare[[original_resource, resource_dropdown.value]])

# format text output
correlationmsg = "Comparing: \n" + "Original resource: " + resource_dropdown.value + " and population: unknown\n" + "Reproduction resource: " + resource_dropdown.value + " and population: " + population_dropdown.value
# correlationmsg += "\nStreet network buffered? "
print(correlationmsg)
rho_result = "Rho = " + str(round(rho.correlation,3)) + ", pvalue = " + str(round(rho.pvalue,3))
print(rho_result)

Comparing: 
Original resource: hospital_icu_beds and population: unknown
Reproduction resource: hospital_icu_beds and population: pop
Rho = 0.842, pvalue = 0.0


plt.scatter(result_compare[[original_resource]], result_compare[[resource_dropdown.value]], s=1)
plt.xlabel("Original", labelpad=5)
plt.ylabel("Alternate Reproduction", labelpad=5)
plt.text(.45, .08, rho_result, fontsize=8)
# save plot as image with file name including icu/ventilators and buffered or not
plt.savefig("./results/figures/reproduced_reproduction/compare_{}_.png".format(resource_dropdown.value))

	GEOID	STATEFP	COUNTYFP	TRACTCE	NAMELSAD	Pop	Unnamed_ 0	NAME	OverFifty	TotalPop	geometry
0	17091011700	17	091	011700	Census Tract 117	3688	588	Census Tract 117, Kankakee County, Illinois	1135	3688	POLYGON ((-87.88768 41.13594, -87.88764 41.136...
1	17091011800	17	091	011800	Census Tract 118	2623	220	Census Tract 118, Kankakee County, Illinois	950	2623	POLYGON ((-87.89410 41.14388, -87.89400 41.143...
2	17119400951	17	119	400951	Census Tract 4009.51	5005	2285	Census Tract 4009.51, Madison County, Illinois	2481	5005	POLYGON ((-90.11192 38.70281, -90.11128 38.703...
3	17119400952	17	119	400952	Census Tract 4009.52	3014	2299	Census Tract 4009.52, Madison County, Illinois	1221	3014	POLYGON ((-90.09442 38.72031, -90.09360 38.720...
4	17135957500	17	135	957500	Census Tract 9575	2869	1026	Census Tract 9575, Montgomery County, Illinois	1171	2869	POLYGON ((-89.70369 39.34803, -89.69928 39.348...

	ZCTA5CE10	County	State	Join	ZONE	ZONENAME	FIPS	pop	cases	geometry
0	60660	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	43242	78	POLYGON ((-87.65049 41.99735, -87.65029 41.996...
1	60640	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	69715	117	POLYGON ((-87.64645 41.97965, -87.64565 41.978...
2	60614	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	71308	134	MULTIPOLYGON (((-87.67703 41.91845, -87.67705 ...
3	60712	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	12539	42	MULTIPOLYGON (((-87.76181 42.00465, -87.76156 ...
4	60076	Cook County	IL	Cook County IL	IL_E	Illinois East	1201	31867	114	MULTIPOLYGON (((-87.74782 42.01540, -87.74526 ...

	FID	Hospital	City	ZIP_Code	X	Y	Total_Bed	Adult ICU	Total Vent	geometry
0	2	Methodist Hospital of Chicago	Chicago	60640	-87.671079	41.972800	145	36	12	MULTIPOINT (-87.67108 41.97280)
1	4	Advocate Christ Medical Center	Oak Lawn	60453	-87.732483	41.720281	785	196	64	MULTIPOINT (-87.73248 41.72028)
2	13	Evanston Hospital	Evanston	60201	-87.683288	42.065393	354	89	29	MULTIPOINT (-87.68329 42.06539)
3	24	AMITA Health Adventist Medical Center Hinsdale	Hinsdale	60521	-87.920116	41.805613	261	65	21	MULTIPOINT (-87.92012 41.80561)
4	25	Holy Cross Hospital	Chicago	60629	-87.690841	41.770001	264	66	21	MULTIPOINT (-87.69084 41.77000)

			osmid	highway	oneway	length	name	geometry	lanes	ref	bridge	maxspeed	access	service	tunnel	junction	width	area
u	v	key
261095436	261095437	0	24067717	residential	False	46.873	NaN	LINESTRING (-87.90237 42.10571, -87.90198 42.1...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
261095437	261095439	0	24067717	residential	False	46.317	NaN	LINESTRING (-87.90198 42.10540, -87.90159 42.1...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	261095436	0	24067717	residential	False	46.873	NaN	LINESTRING (-87.90198 42.10540, -87.90237 42.1...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	261109275	0	24069424	residential	False	34.892	NaN	LINESTRING (-87.90198 42.10540, -87.90227 42.1...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	261109274	0	24069424	residential	False	47.866	NaN	LINESTRING (-87.90198 42.10540, -87.90156 42.1...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	geometry	time	total_pop	hospital_icu_beds	hospital_vents	weight
0	POLYGON ((445179.143 4638984.728, 444631.933 4...	10	649746.78	0.000055	0.000018	1.00
0	POLYGON ((438522.380 4611688.522, 432500.229 4...	10	518276.42	0.000378	0.000123	1.00
0	POLYGON ((444410.308 4649987.662, 443665.508 4...	10	353035.72	0.000252	0.000082	1.00
0	POLYGON ((423893.472 4621571.092, 421747.249 4...	10	530716.34	0.000122	0.000040	1.00
0	POLYGON ((443394.989 4617198.021, 443294.926 4...	10	549015.52	0.000120	0.000038	1.00
...	...	...	...	...	...	...
0	POLYGON ((413304.564 4624675.782, 411438.057 4...	30	883626.18	0.000031	0.000010	0.22
0	POLYGON ((423832.722 4623167.269, 412753.869 4...	30	631863.06	0.000071	0.000022	0.22
0	POLYGON ((450149.221 4637875.751, 451863.333 4...	30	886772.14	0.000095	0.000030	0.22
0	POLYGON ((412193.230 4628128.056, 411285.093 4...	30	800327.78	0.000076	0.000025	0.22
0	POLYGON ((415394.093 4611768.127, 412329.990 4...	30	633362.62	0.000148	0.000047	0.22

	left	top	right	bottom	id	area	geometry	hospital_icu_beds
0	440843.416087	4.638515e+06	441420.766356	4.638015e+06	4158	216506.350946	POLYGON ((440843.416 4638265.403, 440987.754 4...	0.003813
1	440843.416087	4.638015e+06	441420.766356	4.637515e+06	4159	216506.350946	POLYGON ((440843.416 4637765.403, 440987.754 4...	0.003812
2	440843.416087	4.639515e+06	441420.766356	4.639015e+06	4156	216506.350946	POLYGON ((440843.416 4639265.403, 440987.754 4...	0.003922
3	440843.416087	4.639015e+06	441420.766356	4.638515e+06	4157	216506.350946	POLYGON ((440843.416 4638765.403, 440987.754 4...	0.003854
4	440843.416087	4.640515e+06	441420.766356	4.640015e+06	4154	216506.350946	POLYGON ((440843.416 4640265.403, 440987.754 4...	0.003973
...	...	...	...	...	...	...	...	...
3274	440843.416087	4.643015e+06	441420.766356	4.642515e+06	4149	216506.350946	POLYGON ((440843.416 4642765.403, 440987.754 4...	0.003980
3275	440843.416087	4.644515e+06	441420.766356	4.644015e+06	4146	216506.350946	POLYGON ((440843.416 4644265.403, 440987.754 4...	0.003907
3276	440843.416087	4.644015e+06	441420.766356	4.643515e+06	4147	216506.350946	POLYGON ((440843.416 4643765.403, 440987.754 4...	0.003933
3277	440843.416087	4.645515e+06	441420.766356	4.645015e+06	4144	216506.350946	POLYGON ((440843.416 4645265.403, 440987.754 4...	0.003488
3278	440843.416087	4.645015e+06	441420.766356	4.644515e+06	4145	216506.350946	POLYGON ((440843.416 4644765.403, 440987.754 4...	0.003747

	id	hospital_i	hospital_v	geometry
0	4158	0.844249	0.843439	POLYGON ((-87.71312 41.89411, -87.71140 41.896...
1	4159	0.843600	0.843031	POLYGON ((-87.71307 41.88961, -87.71135 41.891...
2	4156	0.906094	0.904699	POLYGON ((-87.71322 41.90312, -87.71150 41.905...
3	4157	0.877197	0.876503	POLYGON ((-87.71317 41.89861, -87.71145 41.900...
4	4154	0.911424	0.910002	POLYGON ((-87.71332 41.91212, -87.71160 41.914...

Title: Reproduction of Spatial Accessibility of COVID-19 Healthcare Resources in Illinois¶

Original Data¶

Original Study Design¶

Alternate Study Design¶

Modules¶

Check Directories¶

Load and Visualize Data¶

Population and COVID-19 Cases Data by County¶

Load Hospital Data¶

Generate and Plot Map of Hospitals¶

Load and Plot Hexagon Grids (500-meter resolution)¶

Load the Road Network¶

Plot the Road Network¶

Check speed limit values¶

network_setting function¶

Preprocess the Network using network_setting¶

Re-check speed limit values¶

"Helper" Functions¶

hospital_setting¶

pop_centroid¶

djikstra_cca_polygons¶

hospital_measure_acc (adjusted to incorporate dijkstra_cca_polygons)¶

measure_acc_par¶

overlap_calc¶

overlapping_function¶

normalization¶

file_import¶

Run the model¶

Process population data¶

Process hospital data¶

Visualize catchment areas for first hospital¶

Calculate hospital catchment areas¶

Calculate accessibility¶

Accessibility Maps¶

Compare Reproduction with Original Results¶

Join original results to current results¶

Calculate Spearman's Rank Correlation¶

Create Scatterplot¶

Results & Discussion¶

Conclusion¶

References¶