Today we will make our first geospatial map from the article Coronavirus in the U.S.: Latest Map and Case Count which looks like the folowing - case counts in US

import geopandas as gpd
import altair as alt
import pandas as pd
alt.renderers.set_embed_options(actions=False)

# Shapefiles from us census
state_shpfile = './shapes/cb_2019_us_state_20m'
county_shpfile = './shapes/cb_2019_us_county_20m'
states = gpd.read_file(state_shpfile)
county = gpd.read_file(county_shpfile)

# Adding longitude and latitude in state data
states['lon'] = states['geometry'].centroid.x
states['lat'] = states['geometry'].centroid.y

# Adding longitude and latitude in county data
county['lon'] = county['geometry'].centroid.x
county['lat'] = county['geometry'].centroid.y
# NYT dataset
county_url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
cdf = pd.read_csv(county_url)
cdf[cdf['fips'].isnull() == True].groupby(['county']).sum()
fips cases deaths
county
Joplin 0.0 329 4
Kansas City 0.0 85094 1700
New York City 0.0 15615980 1528538
Unknown 0.0 885104 41064
#hide_output
cdf[cdf['fips'].isnull() == True].groupby(['county', 'state']).sum()

NYT publishes the data for New York City in a different way by combining the results of the 5 boroughs that comprise it. So we will combine them too and add a new row in the dataset with a custom fips of 1. Let's start by making this change in the raw NYT dataset itself.

cdf.loc[cdf['county'] == 'New York City','fips'] = 1
cdf[cdf['county'] == 'New York City']
date county state fips cases deaths
416 2020-03-01 New York City New York 1.0 1 0
448 2020-03-02 New York City New York 1.0 1 0
482 2020-03-03 New York City New York 1.0 2 0
518 2020-03-04 New York City New York 1.0 2 0
565 2020-03-05 New York City New York 1.0 4 0
... ... ... ... ... ... ...
262876 2020-06-23 New York City New York 1.0 217803 21817
265930 2020-06-24 New York City New York 1.0 218089 21838
268988 2020-06-25 New York City New York 1.0 218429 21856
272054 2020-06-26 New York City New York 1.0 218799 21893
275123 2020-06-27 New York City New York 1.0 219157 21913

119 rows × 6 columns

# collapse
latest_cases = cdf.groupby('fips', as_index=False).agg({'county': 'last', 'date': 'last', 'state': 'last', 'cases': 'last', 'deaths': 'last'})
latest_cases

fips county date state cases deaths
0 1.0 New York City 2020-06-27 New York 219157 21913
1 1001.0 Autauga 2020-06-27 Alabama 498 12
2 1003.0 Baldwin 2020-06-27 Alabama 555 10
3 1005.0 Barbour 2020-06-27 Alabama 317 1
4 1007.0 Bibb 2020-06-27 Alabama 161 1
... ... ... ... ... ... ...
3038 56037.0 Sweetwater 2020-06-27 Wyoming 81 0
3039 56039.0 Teton 2020-06-27 Wyoming 119 1
3040 56041.0 Uinta 2020-06-27 Wyoming 167 0
3041 56043.0 Washakie 2020-06-27 Wyoming 38 5
3042 56045.0 Weston 2020-06-27 Wyoming 1 0

3043 rows × 6 columns

Now we have to make the changes in our shapefile too. For that we need to **dissolve** the 5 buroughs into one single geospatial entity.
#New York City fips = 36005', '36047', '36061', '36081', '36085 which corresponds to New York, Kings, Queens, Bronx and Richmond
spatial_nyc = county[county['GEOID'].isin(['36005', '36047', '36061', '36081', '36085'])]
combined_nyc = spatial_nyc.dissolve(by='STATEFP')
alt.Chart(spatial_nyc).mark_geoshape(stroke='white', strokeWidth=3).encode() | alt.Chart(combined_nyc).mark_geoshape(stroke='white', strokeWidth=3).encode()
agg_nyc_data = spatial_nyc.dissolve(by='STATEFP').reset_index()
agg_nyc_data['GEOID'] = '1'
agg_nyc_data['fips'] = 1
agg_nyc_data['lon'] = agg_nyc_data['geometry'].centroid.x
agg_nyc_data['lat'] = agg_nyc_data['geometry'].centroid.y
agg_nyc_data
STATEFP geometry COUNTYFP COUNTYNS AFFGEOID GEOID NAME LSAD ALAND AWATER lon lat fips
0 36 POLYGON ((-74.24921 40.54506, -74.21684 40.558... 061 00974129 0500000US36061 1 New York 06 58690498 28541727 -73.927011 40.695278 1
# hide_output
county_nyc = gpd.GeoDataFrame(pd.concat([county, agg_nyc_data], ignore_index=True))
county_nyc['fips'] = county_nyc['GEOID']
county_nyc['fips'] = county_nyc['fips'].astype('int')
county_nyc
# generate FIPS in the shapefile itself by combining STATEFP and COUNTYFP
#county2['STATEFP'] + county2['COUNTYFP']
#latest_cases['fips'] = latest_cases['fips'].astype('int')
latest_cases['fips'].isin(county_nyc['fips']).value_counts()
True    3043
Name: fips, dtype: int64
latest_cases[latest_cases['county'] == 'New York City']
fips county date state cases deaths
0 1.0 New York City 2020-06-27 New York 219157 21913
county_nyc[county_nyc['fips'] == 1]
STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME LSAD ALAND AWATER geometry lon lat fips
3220 36 061 00974129 0500000US36061 1 New York 06 58690498 28541727 POLYGON ((-74.24921 40.54506, -74.21684 40.558... -73.927011 40.695278 1

# collapse
latest_cases_w_fips = county_nyc.merge(latest_cases, how='left', on='fips')

circle_selection = alt.selection_single(on='mouseover', empty='none')

circles = alt.Chart(latest_cases_w_fips).mark_point(fillOpacity=0.2, fill='red', strokeOpacity=1, color='red', strokeWidth=1).encode(
    latitude="lat:Q",
    longitude="lon:Q",
    size=alt.Size('cases:Q', scale=alt.Scale(domain=[0, 7000],),legend=alt.Legend(title="Cases")),
    tooltip=['county:N', 'cases:Q', 'deaths:Q'],
    color = alt.condition(circle_selection, alt.value('black'), alt.value('red'))
).project(
    type='albersUsa'
).properties(
    width=1000,
    height=700
).add_selection(
    circle_selection
)

state = alt.Chart(states).mark_geoshape(fill='#ededed', stroke='white').encode(
).project(
    type='albersUsa'
)

state_text = state.mark_text().transform_filter(alt.datum.NAME != 'Puerto Rico').encode(
    longitude='lon:Q',
    latitude='lat:Q',
    text='NAME',
).project(
    type='albersUsa'
)

(state+circles+state_text).configure_view(strokeWidth=0)