# Source: https://github.com/cwerner/covid19 # 1. Source data directly from GitHub (JHU COVID) # 2. Configurable UI based on two variables - inhabitants and countries # 3. Use Altair (https://altair-viz.github.io/) declarative statistical visualization charts import datetime from functools import reduce import streamlit as st from streamlit import caching import pandas as pd import altair as alt import os # numbers for 2019 inhabitants = {'India': 1352.6, 'US': 328.2, 'Brazil': 209.5, 'Russia': 144.5, 'United Kingdom': 67.1, 'China': 1392.7, 'Italy': 60.23} @st.cache def read_data(): BASEURL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series" url_confirmed = f"{BASEURL}/time_series_covid19_confirmed_global.csv" url_deaths = f"{BASEURL}/time_series_covid19_deaths_global.csv" url_recovered = f"{BASEURL}/time_series_covid19_recovered_global.csv" confirmed = pd.read_csv(url_confirmed, index_col=0) deaths = pd.read_csv(url_deaths, index_col=0) recovered = pd.read_csv(url_recovered, index_col=0) # sum over potentially duplicate rows (France and their territories) confirmed = confirmed.groupby("Country/Region").sum().reset_index() deaths = deaths.groupby("Country/Region").sum().reset_index() recovered = recovered.groupby("Country/Region").sum().reset_index() return (confirmed, deaths, recovered) def transform(df, collabel='confirmed'): dfm = pd.melt(df) dfm["date"] = pd.to_datetime(dfm.variable, infer_datetime_format=True) dfm = dfm.set_index("date") dfm = dfm[["value"]] dfm.columns = [collabel] return dfm def transform2(df, collabel='confirmed'): dfm = pd.melt(df, id_vars=["Country/Region"]) dfm["date"] = pd.to_datetime(dfm.variable, infer_datetime_format=True) dfm = dfm.set_index("date") dfm = dfm[["Country/Region","value"]] dfm.columns = ["country", collabel] return dfm def app(): st.title("🦠 Covid-19 Data Explorer") st.markdown("""\ This app illustrates the spread of COVID-19 in select countries over time. """) #st.error("⚠️ There is currently an issue in the datasource of JHU. Data for 03/13 is invalid and thus removed!") countries = ["India", "US", "Russia", "Brazil", "China", "Italy", "United Kingdom"] analysis = st.sidebar.selectbox("Choose Analysis", ["Overview", "By Country"]) if analysis == "Overview": st.header("COVID-19 cases and fatality rate") st.markdown("""\ These are the reported case numbers for a selection of countries""" f""" (currently only {', '.join(countries)}). """ """The case fatality rate (CFR) is calculated as: $$ CFR[\%] = \\frac{fatalities}{\\textit{all cases}} $$ ℹ️ You can select/ deselect countries and switch between linear and log scales. """) confirmed, deaths, recovered = read_data() multiselection = st.multiselect("Select countries:", countries, default=countries) logscale = st.checkbox("Log scale", False) confirmed = confirmed[confirmed["Country/Region"].isin(multiselection)] confirmed = confirmed.drop(["Lat", "Long"],axis=1) confirmed = transform2(confirmed, collabel="confirmed") deaths = deaths[deaths["Country/Region"].isin(multiselection)] deaths = deaths.drop(["Lat", "Long"],axis=1) deaths = transform2(deaths, collabel="deaths") frate = confirmed[["country"]] frate["frate"] = (deaths.deaths / confirmed.confirmed)*100 # saveguard for empty selection if len(multiselection) == 0: return SCALE = alt.Scale(type='linear') if logscale: confirmed["confirmed"] += 0.00001 confirmed = confirmed[confirmed.index > '2020-02-16'] frate = frate[frate.index > '2020-02-16'] SCALE = alt.Scale(type='log', domain=[10, int(max(confirmed.confirmed))], clamp=True) c2 = alt.Chart(confirmed.reset_index()).properties(height=150).mark_line().encode( x=alt.X("date:T", title="Date"), y=alt.Y("confirmed:Q", title="Cases", scale=SCALE), color=alt.Color('country:N', title="Country") ) # case fatality rate... c3 = alt.Chart(frate.reset_index()).properties(height=100).mark_line().encode( x=alt.X("date:T", title="Date"), y=alt.Y("frate:Q", title="Fatality rate [%]", scale=alt.Scale(type='linear')), color=alt.Color('country:N', title="Country") ) per100k = confirmed.loc[[confirmed.index.max()]].copy() per100k.loc[:,'inhabitants'] = per100k.apply(lambda x: inhabitants[x['country']], axis=1) per100k.loc[:,'per100k'] = per100k.confirmed / (per100k.inhabitants * 1_000_000) * 100_000 per100k = per100k.set_index("country") per100k = per100k.sort_values(ascending=False, by='per100k') per100k.loc[:,'per100k'] = per100k.per100k.round(2) c4 = alt.Chart(per100k.reset_index()).properties(width=75).mark_bar().encode( x=alt.X("per100k:Q", title="Cases per 100k inhabitants"), y=alt.Y("country:N", title="Countries", sort=None), color=alt.Color('country:N', title="Country"), tooltip=[alt.Tooltip('country:N', title='Country'), alt.Tooltip('per100k:Q', title='Cases per 100k'), alt.Tooltip('inhabitants:Q', title='Inhabitants [mio]')] ) st.altair_chart(alt.hconcat(c4, alt.vconcat(c2, c3)), use_container_width=True) st.markdown(f"""\