Covid

Mon 30 June 2025
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="darkgrid")
data = {
    'Country': ['USA', 'India', 'Brazil', 'Russia', 'UK', 'France', 'Spain', 'Italy'],
    'TotalCases': [34000000, 31000000, 19000000, 7000000, 5000000, 5800000, 3700000, 4200000],
    'TotalDeaths': [610000, 410000, 530000, 190000, 130000, 110000, 81000, 127000],
    'TotalRecovered': [28000000, 30000000, 17000000, 6500000, 4800000, 5000000, 3500000, 3900000],
    'Population': [331000000, 1380000000, 213000000, 146000000, 68000000, 67000000, 47000000, 60000000]
}
df = pd.DataFrame(data)
df
Country TotalCases TotalDeaths TotalRecovered Population
0 USA 34000000 610000 28000000 331000000
1 India 31000000 410000 30000000 1380000000
2 Brazil 19000000 530000 17000000 213000000
3 Russia 7000000 190000 6500000 146000000
4 UK 5000000 130000 4800000 68000000
5 France 5800000 110000 5000000 67000000
6 Spain 3700000 81000 3500000 47000000
7 Italy 4200000 127000 3900000 60000000
df.isnull().sum()
Country           0
TotalCases        0
TotalDeaths       0
TotalRecovered    0
Population        0
dtype: int64
df['ActiveCases'] = df['TotalCases'] - df['TotalRecovered'] - df['TotalDeaths']
df
Country TotalCases TotalDeaths TotalRecovered Population ActiveCases
0 USA 34000000 610000 28000000 331000000 5390000
1 India 31000000 410000 30000000 1380000000 590000
2 Brazil 19000000 530000 17000000 213000000 1470000
3 Russia 7000000 190000 6500000 146000000 310000
4 UK 5000000 130000 4800000 68000000 70000
5 France 5800000 110000 5000000 67000000 690000
6 Spain 3700000 81000 3500000 47000000 119000
7 Italy 4200000 127000 3900000 60000000 173000
df['MortalityRate'] = (df['TotalDeaths'] / df['TotalCases']) * 100
df['MortalityRate'] = df['MortalityRate'].round(2)
df
Country TotalCases TotalDeaths TotalRecovered Population ActiveCases MortalityRate
0 USA 34000000 610000 28000000 331000000 5390000 1.79
1 India 31000000 410000 30000000 1380000000 590000 1.32
2 Brazil 19000000 530000 17000000 213000000 1470000 2.79
3 Russia 7000000 190000 6500000 146000000 310000 2.71
4 UK 5000000 130000 4800000 68000000 70000 2.60
5 France 5800000 110000 5000000 67000000 690000 1.90
6 Spain 3700000 81000 3500000 47000000 119000 2.19
7 Italy 4200000 127000 3900000 60000000 173000 3.02
df['RecoveryRate'] = (df['TotalRecovered'] / df['TotalCases']) * 100
df['RecoveryRate'] = df['RecoveryRate'].round(2)
df
Country TotalCases TotalDeaths TotalRecovered Population ActiveCases MortalityRate RecoveryRate
0 USA 34000000 610000 28000000 331000000 5390000 1.79 82.35
1 India 31000000 410000 30000000 1380000000 590000 1.32 96.77
2 Brazil 19000000 530000 17000000 213000000 1470000 2.79 89.47
3 Russia 7000000 190000 6500000 146000000 310000 2.71 92.86
4 UK 5000000 130000 4800000 68000000 70000 2.60 96.00
5 France 5800000 110000 5000000 67000000 690000 1.90 86.21
6 Spain 3700000 81000 3500000 47000000 119000 2.19 94.59
7 Italy 4200000 127000 3900000 60000000 173000 3.02 92.86
df['CasesPer100k'] = (df['TotalCases'] / df['Population']) * 100000
df['CasesPer100k'] = df['CasesPer100k'].round(2)
df
Country TotalCases TotalDeaths TotalRecovered Population ActiveCases MortalityRate RecoveryRate CasesPer100k
0 USA 34000000 610000 28000000 331000000 5390000 1.79 82.35 10271.90
1 India 31000000 410000 30000000 1380000000 590000 1.32 96.77 2246.38
2 Brazil 19000000 530000 17000000 213000000 1470000 2.79 89.47 8920.19
3 Russia 7000000 190000 6500000 146000000 310000 2.71 92.86 4794.52
4 UK 5000000 130000 4800000 68000000 70000 2.60 96.00 7352.94
5 France 5800000 110000 5000000 67000000 690000 1.90 86.21 8656.72
6 Spain 3700000 81000 3500000 47000000 119000 2.19 94.59 7872.34
7 Italy 4200000 127000 3900000 60000000 173000 3.02 92.86 7000.00
df.sort_values('TotalCases', ascending=False)
Country TotalCases TotalDeaths TotalRecovered Population ActiveCases MortalityRate RecoveryRate CasesPer100k
0 USA 34000000 610000 28000000 331000000 5390000 1.79 82.35 10271.90
1 India 31000000 410000 30000000 1380000000 590000 1.32 96.77 2246.38
2 Brazil 19000000 530000 17000000 213000000 1470000 2.79 89.47 8920.19
3 Russia 7000000 190000 6500000 146000000 310000 2.71 92.86 4794.52
5 France 5800000 110000 5000000 67000000 690000 1.90 86.21 8656.72
4 UK 5000000 130000 4800000 68000000 70000 2.60 96.00 7352.94
7 Italy 4200000 127000 3900000 60000000 173000 3.02 92.86 7000.00
6 Spain 3700000 81000 3500000 47000000 119000 2.19 94.59 7872.34
plt.figure(figsize=(10,5))
sns.barplot(x="Country", y="TotalCases", data=df)
plt.title("Total COVID-19 Cases by Country")
plt.xticks(rotation=45)
plt.show()

png

plt.figure(figsize=(6,6))
plt.pie(df['TotalDeaths'], labels=df['Country'], autopct='%1.1f%%', startangle=140)
plt.title("Share of Total Deaths")
plt.show()

png

plt.figure(figsize=(8,6))
sns.scatterplot(x="RecoveryRate", y="MortalityRate", data=df, hue="Country", s=100)
plt.title("Recovery vs Mortality Rate")
plt.show()

png

correlation = df.corr(numeric_only=True)
correlation
TotalCases TotalDeaths TotalRecovered Population ActiveCases MortalityRate RecoveryRate CasesPer100k
TotalCases 1.000000 0.912920 0.993700 0.716484 0.735629 -0.630630 -0.350647 -0.070176
TotalDeaths 0.912920 1.000000 0.885787 0.456123 0.785250 -0.319051 -0.481419 0.170433
TotalRecovered 0.993700 0.885787 1.000000 0.786761 0.655228 -0.650646 -0.258257 -0.167963
Population 0.716484 0.456123 0.786761 1.000000 0.084207 -0.702345 0.279695 -0.688266
ActiveCases 0.735629 0.785250 0.655228 0.084207 1.000000 -0.344567 -0.801032 0.533024
MortalityRate -0.630630 -0.319051 -0.650646 -0.702345 -0.344567 1.000000 0.153110 0.228256
RecoveryRate -0.350647 -0.481419 -0.258257 0.279695 -0.801032 0.153110 1.000000 -0.739556
CasesPer100k -0.070176 0.170433 -0.167963 -0.688266 0.533024 0.228256 -0.739556 1.000000
plt.figure(figsize=(10,6))
sns.heatmap(correlation, annot=True, cmap="Blues")
plt.title("Feature Correlation Matrix")
plt.show()

png

df.sort_values('MortalityRate', ascending=False).head(5)
Country TotalCases TotalDeaths TotalRecovered Population ActiveCases MortalityRate RecoveryRate CasesPer100k
7 Italy 4200000 127000 3900000 60000000 173000 3.02 92.86 7000.00
2 Brazil 19000000 530000 17000000 213000000 1470000 2.79 89.47 8920.19
3 Russia 7000000 190000 6500000 146000000 310000 2.71 92.86 4794.52
4 UK 5000000 130000 4800000 68000000 70000 2.60 96.00 7352.94
6 Spain 3700000 81000 3500000 47000000 119000 2.19 94.59 7872.34
totals = df[['TotalCases', 'TotalDeaths', 'TotalRecovered']].sum()
totals
TotalCases        109700000
TotalDeaths         2188000
TotalRecovered     98700000
dtype: int64
global_mortality = (totals['TotalDeaths'] / totals['TotalCases']) * 100
round(global_mortality, 2)
np.float64(1.99)
df['RiskLevel'] = df['MortalityRate'].apply(lambda x: 'High' if x > 2 else 'Moderate')
df
Country TotalCases TotalDeaths TotalRecovered Population ActiveCases MortalityRate RecoveryRate CasesPer100k RiskLevel
0 USA 34000000 610000 28000000 331000000 5390000 1.79 82.35 10271.90 Moderate
1 India 31000000 410000 30000000 1380000000 590000 1.32 96.77 2246.38 Moderate
2 Brazil 19000000 530000 17000000 213000000 1470000 2.79 89.47 8920.19 High
3 Russia 7000000 190000 6500000 146000000 310000 2.71 92.86 4794.52 High
4 UK 5000000 130000 4800000 68000000 70000 2.60 96.00 7352.94 High
5 France 5800000 110000 5000000 67000000 690000 1.90 86.21 8656.72 Moderate
6 Spain 3700000 81000 3500000 47000000 119000 2.19 94.59 7872.34 High
7 Italy 4200000 127000 3900000 60000000 173000 3.02 92.86 7000.00 High
sns.countplot(x="RiskLevel", data=df)
plt.title("Country Risk Level Count")
plt.show()

png



Score: 15

Category: basics