Today we will make the chart that looks like above which appeared the following reports on Slowdown of job growth in various industries -

Finding out the data used by the articles was tedious but I managed it anyways. Here's how did it -

  1. Main cited source is Beaureau of Labour Statistics
  2. Painfully going through the website, didn't make much sense of all that was there. Too much info.
  3. Google Search - BLS employment by industry data
  4. Opened this link and thought this looks interesting - https://www.bls.gov/charts/employment-situation/employment-levels-by-industry.htm#
  5. We know now that we need Employmnt by Industry Data
  6. Lets try API - https://www.bls.gov/data/#api -> https://www.bls.gov/developers/
  7. So we need Series ID for the tables
  8. Data Tools -> Series Report -> Series ID Formats -> Employment & Unemployment -> National Employment, Hours, and Earnings -> That has all the information about how to construct the query.
  9. Queried Answer matches exactly the Show Table result on the 3rd Step!
import altair as alt
import requests
import pandas as pd

alt.renderers.set_embed_options(actions=False)

uri = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
headers = {'Content-type': 'application/json'}
r = requests.post(uri, data='{"seriesid":["CES6500000001", "CES6000000001", "CES3000000001", "CES7000000001", "CES4200000001", "CES2000000001"], "startyear":"2016", "endyear":"2020"}', headers=headers)
data = pd.concat([pd.DataFrame({'type': x['seriesID'], **(pd.concat([pd.Series(y).to_frame().T for y in x['data']])[::-1].to_dict(orient='list'))}) for x in r.json()['Results']['series']])
data = data.reset_index(drop=True)
data.head()
type year period periodName latest value footnotes calculations
0 CES6500000001 2016 M01 January NaN 22337 [{}] NaN
1 CES6500000001 2016 M02 February NaN 22405 [{}] NaN
2 CES6500000001 2016 M03 March NaN 22452 [{}] NaN
3 CES6500000001 2016 M04 April NaN 22510 [{}] NaN
4 CES6500000001 2016 M05 May NaN 22567 [{}] NaN
data['value'] = data['value'].astype(float)
data['time'] = pd.to_datetime(data['year']+data['periodName'], format="%Y%B")
data = data.assign(change = data.groupby('type')['value'].transform('diff').reset_index(drop=True))
data = data.assign(cumulative_change = data.groupby('type')['value'].apply(lambda x: x - x.iloc[0]))
data.head()
type year period periodName latest value footnotes calculations time change cumulative_change
0 CES6500000001 2016 M01 January NaN 22337.0 [{}] NaN 2016-01-01 NaN 0.0
1 CES6500000001 2016 M02 February NaN 22405.0 [{}] NaN 2016-02-01 68.0 68.0
2 CES6500000001 2016 M03 March NaN 22452.0 [{}] NaN 2016-03-01 47.0 115.0
3 CES6500000001 2016 M04 April NaN 22510.0 [{}] NaN 2016-04-01 58.0 173.0
4 CES6500000001 2016 M05 May NaN 22567.0 [{}] NaN 2016-05-01 57.0 230.0
data['type'] = data['type'].apply(lambda x: 'Construction' if x == 'CES2000000001' else 'Education and health' if x == 'CES6500000001' else 'Business and professional services' if x == 'CES6000000001' else 'Manufacturing' if x == 'CES3000000001' else 'Leisure and hospitality' if x == 'CES7000000001' else 'Retail')
plot_data = data.copy()
plot_data = plot_data.assign(since_feb = plot_data['type'].map(plot_data.groupby('type').apply(lambda x: int(x['value'].iloc[-1] - x[x['time'] == '2020-02-01']['value']))))
plot_data.groupby('type').apply(lambda x: int(x[x['time'] == '2020-02-01']['value'] - x['value'].iloc[-1])).reset_index(level=0,drop=True)
0    1386
1     394
2    1397
3    3840
4     647
5     483
dtype: int64
# plot_data[plot_data['type'] == 'Education and health']
plot_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 342 entries, 0 to 341
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   type               342 non-null    object        
 1   year               342 non-null    object        
 2   period             342 non-null    object        
 3   periodName         342 non-null    object        
 4   latest             6 non-null      object        
 5   value              342 non-null    float64       
 6   footnotes          342 non-null    object        
 7   calculations       90 non-null     object        
 8   time               342 non-null    datetime64[ns]
 9   change             336 non-null    float64       
 10  cumulative_change  342 non-null    float64       
 11  since_feb          342 non-null    int64         
dtypes: datetime64[ns](1), float64(3), int64(1), object(7)
memory usage: 32.2+ KB

We will use step-after interpolation method. Find out more about meaning of various interpolation methods at https://github.com/d3/d3-shape/blob/master/README.md#curves

alt.Chart(data, height=200, title="Some industries are approaching pre-pandemic employment, but leisure and hospitality jobs are lagging far behind").mark_area(line=True, interpolate='step-after',).encode(
    x= alt.X('time', title=None),
    y= alt.Y('cumulative_change:Q', title=None, scale=alt.Scale(domain=[-3000, 3000])),
#     row = alt.Row('type:N', rows=3)
    facet = alt.Facet('type:N', columns=2, spacing={'row': -200}, 
                      sort=['Construction', 'Education and health', 'Business and professional services', 'Manufacturing', 'Retail', 'Leisure and hospitality'],
                      title="Cumulative change in jobs since August 2016, by industry"
                     )
).resolve_axis(y='independent', x='independent')

Let's try to colour them differently from February -

alt.Chart(data, height=200, title="Some industries are approaching pre-pandemic employment, but leisure and hospitality jobs are lagging far behind"
         ).mark_area(
    line=True, 
    interpolate='step-after'
    ).transform_calculate(
    recent = alt.datum.time > alt.expr.toDate('2020-01-31')
).encode(
    x= alt.X('time', title=None),
    y= alt.Y('cumulative_change:Q', title=None, scale=alt.Scale(domain=[-3000, 3000])),
    color = 'recent:N',
    stroke = 'recent:N',
#     row = alt.Row('type:N', rows=3)
    facet = alt.Facet('type:N', columns=2, spacing={'row': -200}, 
                      sort=['Construction', 'Education and health', 'Business and professional services', 'Manufacturing', 'Retail', 'Leisure and hospitality'],
                      title="Cumulative change in jobs since August 2016, by industry"
                     )
).resolve_axis(y='independent', x='independent')

The lines are overlapped. Let's try a different approach -

base = alt.Chart(height=200).transform_calculate(
    recent = alt.datum.time >= alt.expr.toDate('2020-01-01')
).encode(
    x= alt.X('time:T', title=None),
    y= alt.Y('cumulative_change:Q', title=None, scale=alt.Scale(domain=[-3000, 3000])),
    tooltip=['time']
)
    
area = base.mark_area(interpolate='step-after', fillOpacity=0.6).encode(color = 'recent:N')

line = base.mark_line(interpolate='step-after', color="blue").encode(color = 'recent:N')#'recent:N',)
    
alt.layer(area,line, data=plot_data).facet(alt.Facet('type:N', 
                      sort=['Construction', 'Education and health', 'Business and professional services', 'Manufacturing', 'Retail', 'Leisure and hospitality'],
                      title="Cumulative change in jobs since August 2016, by industry"
                     )
).resolve_axis(y='independent', x='independent').configure_facet(columns=2).properties(title="Some industries are approaching pre-pandemic employment, but leisure and hospitality jobs are lagging far behind")

That was better. Let's make it aesthetically pleasing -

base = alt.Chart(height=200).transform_calculate(
    recent = alt.datum.time >= alt.expr.toDate('2020-01-01')
).encode(
    x= alt.X('time:T', title=None, axis=alt.Axis(format="%y", domainDash=[2,2.5], domainWidth=1.5, tickCount=5, domain=False, labelPadding=1)),
    y= alt.Y('cumulative_change:Q', title=None, axis=alt.Axis(domain=False), scale=alt.Scale(domain=[-2000, 2000])),
)
    
area = base.mark_area(interpolate='step-after', fillOpacity=0.1).encode(color = alt.Color('recent:N', legend=None))

line = base.mark_line(interpolate='step-after', strokeCap="round").encode(color = alt.Color('recent:N', scale=alt.Scale(domain=['false', 'true'], range=['black', 'rgb(22, 174, 205)'])))#'recent:N',)

dot = base.mark_circle(color='rgb(22, 174, 205)', size=50).encode(
    x='max(time)',
    y=alt.Y('cumulative_change:Q', aggregate={'argmax': 'time'}),
)

text = base.mark_text(align='left', dx=10).encode(
    x='max(time)',
    y=alt.Y('cumulative_change:Q', aggregate={'argmax': 'time'}),
    text=alt.Text('since_feb:Q', aggregate={'argmax': 'time'})
)

h_rule = alt.Chart(pd.DataFrame({'zero': [[0]]})).mark_rule(strokeDash=[2,2]).encode(y='zero:Q')

alt.layer(area,line,dot,text, h_rule, data=plot_data).facet(facet=alt.Facet('type:N', 
                      sort=['Construction', 'Education and health', 'Business and professional services', 'Manufacturing', 'Retail', 'Leisure and hospitality'],
                      header=alt.Header(title="Cumulative change in jobs since August 2016, by industry", titleOrient="top", titleAnchor='start', titleFontSize=15, titleColor='grey')
                     ), spacing={"row": -100}
).resolve_axis(y='independent', x='independent').configure_facet(columns=2).configure_axis(grid=False).configure_view(stroke=None).properties(title="Some industries are approaching pre-pandemic employment, but leisure and hospitality jobs are lagging far behind").configure_title(fontSize=17)

We cannot fix the row spacing because of the last chart - all the facet charts will have the area of the sub-chart with the mximum area.