import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
data = {
'Country': ['USA', 'India', 'Brazil', 'Russia', 'UK', 'France', 'Spain', 'Italy'],
'TotalCases': [34000000, 31000000, 19000000, 7000000, 5000000, 5800000, 3700000, 4200000],
'TotalDeaths': [610000, 410000, 530000, 190000, 130000, 110000, 81000, 127000],
'TotalRecovered': [28000000, 30000000, 17000000, 6500000, 4800000, 5000000, 3500000, 3900000],
'Population': [331000000, 1380000000, 213000000, 146000000, 68000000, 67000000, 47000000, 60000000]
}
df = pd.DataFrame(data)
df
|
Country |
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
| 0 |
USA |
34000000 |
610000 |
28000000 |
331000000 |
| 1 |
India |
31000000 |
410000 |
30000000 |
1380000000 |
| 2 |
Brazil |
19000000 |
530000 |
17000000 |
213000000 |
| 3 |
Russia |
7000000 |
190000 |
6500000 |
146000000 |
| 4 |
UK |
5000000 |
130000 |
4800000 |
68000000 |
| 5 |
France |
5800000 |
110000 |
5000000 |
67000000 |
| 6 |
Spain |
3700000 |
81000 |
3500000 |
47000000 |
| 7 |
Italy |
4200000 |
127000 |
3900000 |
60000000 |
Country 0
TotalCases 0
TotalDeaths 0
TotalRecovered 0
Population 0
dtype: int64
df['ActiveCases'] = df['TotalCases'] - df['TotalRecovered'] - df['TotalDeaths']
df
|
Country |
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
ActiveCases |
| 0 |
USA |
34000000 |
610000 |
28000000 |
331000000 |
5390000 |
| 1 |
India |
31000000 |
410000 |
30000000 |
1380000000 |
590000 |
| 2 |
Brazil |
19000000 |
530000 |
17000000 |
213000000 |
1470000 |
| 3 |
Russia |
7000000 |
190000 |
6500000 |
146000000 |
310000 |
| 4 |
UK |
5000000 |
130000 |
4800000 |
68000000 |
70000 |
| 5 |
France |
5800000 |
110000 |
5000000 |
67000000 |
690000 |
| 6 |
Spain |
3700000 |
81000 |
3500000 |
47000000 |
119000 |
| 7 |
Italy |
4200000 |
127000 |
3900000 |
60000000 |
173000 |
df['MortalityRate'] = (df['TotalDeaths'] / df['TotalCases']) * 100
df['MortalityRate'] = df['MortalityRate'].round(2)
df
|
Country |
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
ActiveCases |
MortalityRate |
| 0 |
USA |
34000000 |
610000 |
28000000 |
331000000 |
5390000 |
1.79 |
| 1 |
India |
31000000 |
410000 |
30000000 |
1380000000 |
590000 |
1.32 |
| 2 |
Brazil |
19000000 |
530000 |
17000000 |
213000000 |
1470000 |
2.79 |
| 3 |
Russia |
7000000 |
190000 |
6500000 |
146000000 |
310000 |
2.71 |
| 4 |
UK |
5000000 |
130000 |
4800000 |
68000000 |
70000 |
2.60 |
| 5 |
France |
5800000 |
110000 |
5000000 |
67000000 |
690000 |
1.90 |
| 6 |
Spain |
3700000 |
81000 |
3500000 |
47000000 |
119000 |
2.19 |
| 7 |
Italy |
4200000 |
127000 |
3900000 |
60000000 |
173000 |
3.02 |
df['RecoveryRate'] = (df['TotalRecovered'] / df['TotalCases']) * 100
df['RecoveryRate'] = df['RecoveryRate'].round(2)
df
|
Country |
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
ActiveCases |
MortalityRate |
RecoveryRate |
| 0 |
USA |
34000000 |
610000 |
28000000 |
331000000 |
5390000 |
1.79 |
82.35 |
| 1 |
India |
31000000 |
410000 |
30000000 |
1380000000 |
590000 |
1.32 |
96.77 |
| 2 |
Brazil |
19000000 |
530000 |
17000000 |
213000000 |
1470000 |
2.79 |
89.47 |
| 3 |
Russia |
7000000 |
190000 |
6500000 |
146000000 |
310000 |
2.71 |
92.86 |
| 4 |
UK |
5000000 |
130000 |
4800000 |
68000000 |
70000 |
2.60 |
96.00 |
| 5 |
France |
5800000 |
110000 |
5000000 |
67000000 |
690000 |
1.90 |
86.21 |
| 6 |
Spain |
3700000 |
81000 |
3500000 |
47000000 |
119000 |
2.19 |
94.59 |
| 7 |
Italy |
4200000 |
127000 |
3900000 |
60000000 |
173000 |
3.02 |
92.86 |
df['CasesPer100k'] = (df['TotalCases'] / df['Population']) * 100000
df['CasesPer100k'] = df['CasesPer100k'].round(2)
df
|
Country |
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
ActiveCases |
MortalityRate |
RecoveryRate |
CasesPer100k |
| 0 |
USA |
34000000 |
610000 |
28000000 |
331000000 |
5390000 |
1.79 |
82.35 |
10271.90 |
| 1 |
India |
31000000 |
410000 |
30000000 |
1380000000 |
590000 |
1.32 |
96.77 |
2246.38 |
| 2 |
Brazil |
19000000 |
530000 |
17000000 |
213000000 |
1470000 |
2.79 |
89.47 |
8920.19 |
| 3 |
Russia |
7000000 |
190000 |
6500000 |
146000000 |
310000 |
2.71 |
92.86 |
4794.52 |
| 4 |
UK |
5000000 |
130000 |
4800000 |
68000000 |
70000 |
2.60 |
96.00 |
7352.94 |
| 5 |
France |
5800000 |
110000 |
5000000 |
67000000 |
690000 |
1.90 |
86.21 |
8656.72 |
| 6 |
Spain |
3700000 |
81000 |
3500000 |
47000000 |
119000 |
2.19 |
94.59 |
7872.34 |
| 7 |
Italy |
4200000 |
127000 |
3900000 |
60000000 |
173000 |
3.02 |
92.86 |
7000.00 |
df.sort_values('TotalCases', ascending=False)
|
Country |
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
ActiveCases |
MortalityRate |
RecoveryRate |
CasesPer100k |
| 0 |
USA |
34000000 |
610000 |
28000000 |
331000000 |
5390000 |
1.79 |
82.35 |
10271.90 |
| 1 |
India |
31000000 |
410000 |
30000000 |
1380000000 |
590000 |
1.32 |
96.77 |
2246.38 |
| 2 |
Brazil |
19000000 |
530000 |
17000000 |
213000000 |
1470000 |
2.79 |
89.47 |
8920.19 |
| 3 |
Russia |
7000000 |
190000 |
6500000 |
146000000 |
310000 |
2.71 |
92.86 |
4794.52 |
| 5 |
France |
5800000 |
110000 |
5000000 |
67000000 |
690000 |
1.90 |
86.21 |
8656.72 |
| 4 |
UK |
5000000 |
130000 |
4800000 |
68000000 |
70000 |
2.60 |
96.00 |
7352.94 |
| 7 |
Italy |
4200000 |
127000 |
3900000 |
60000000 |
173000 |
3.02 |
92.86 |
7000.00 |
| 6 |
Spain |
3700000 |
81000 |
3500000 |
47000000 |
119000 |
2.19 |
94.59 |
7872.34 |
plt.figure(figsize=(10,5))
sns.barplot(x="Country", y="TotalCases", data=df)
plt.title("Total COVID-19 Cases by Country")
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(6,6))
plt.pie(df['TotalDeaths'], labels=df['Country'], autopct='%1.1f%%', startangle=140)
plt.title("Share of Total Deaths")
plt.show()

plt.figure(figsize=(8,6))
sns.scatterplot(x="RecoveryRate", y="MortalityRate", data=df, hue="Country", s=100)
plt.title("Recovery vs Mortality Rate")
plt.show()

correlation = df.corr(numeric_only=True)
correlation
|
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
ActiveCases |
MortalityRate |
RecoveryRate |
CasesPer100k |
| TotalCases |
1.000000 |
0.912920 |
0.993700 |
0.716484 |
0.735629 |
-0.630630 |
-0.350647 |
-0.070176 |
| TotalDeaths |
0.912920 |
1.000000 |
0.885787 |
0.456123 |
0.785250 |
-0.319051 |
-0.481419 |
0.170433 |
| TotalRecovered |
0.993700 |
0.885787 |
1.000000 |
0.786761 |
0.655228 |
-0.650646 |
-0.258257 |
-0.167963 |
| Population |
0.716484 |
0.456123 |
0.786761 |
1.000000 |
0.084207 |
-0.702345 |
0.279695 |
-0.688266 |
| ActiveCases |
0.735629 |
0.785250 |
0.655228 |
0.084207 |
1.000000 |
-0.344567 |
-0.801032 |
0.533024 |
| MortalityRate |
-0.630630 |
-0.319051 |
-0.650646 |
-0.702345 |
-0.344567 |
1.000000 |
0.153110 |
0.228256 |
| RecoveryRate |
-0.350647 |
-0.481419 |
-0.258257 |
0.279695 |
-0.801032 |
0.153110 |
1.000000 |
-0.739556 |
| CasesPer100k |
-0.070176 |
0.170433 |
-0.167963 |
-0.688266 |
0.533024 |
0.228256 |
-0.739556 |
1.000000 |
plt.figure(figsize=(10,6))
sns.heatmap(correlation, annot=True, cmap="Blues")
plt.title("Feature Correlation Matrix")
plt.show()

df.sort_values('MortalityRate', ascending=False).head(5)
|
Country |
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
ActiveCases |
MortalityRate |
RecoveryRate |
CasesPer100k |
| 7 |
Italy |
4200000 |
127000 |
3900000 |
60000000 |
173000 |
3.02 |
92.86 |
7000.00 |
| 2 |
Brazil |
19000000 |
530000 |
17000000 |
213000000 |
1470000 |
2.79 |
89.47 |
8920.19 |
| 3 |
Russia |
7000000 |
190000 |
6500000 |
146000000 |
310000 |
2.71 |
92.86 |
4794.52 |
| 4 |
UK |
5000000 |
130000 |
4800000 |
68000000 |
70000 |
2.60 |
96.00 |
7352.94 |
| 6 |
Spain |
3700000 |
81000 |
3500000 |
47000000 |
119000 |
2.19 |
94.59 |
7872.34 |
totals = df[['TotalCases', 'TotalDeaths', 'TotalRecovered']].sum()
totals
TotalCases 109700000
TotalDeaths 2188000
TotalRecovered 98700000
dtype: int64
global_mortality = (totals['TotalDeaths'] / totals['TotalCases']) * 100
round(global_mortality, 2)
df['RiskLevel'] = df['MortalityRate'].apply(lambda x: 'High' if x > 2 else 'Moderate')
df
|
Country |
TotalCases |
TotalDeaths |
TotalRecovered |
Population |
ActiveCases |
MortalityRate |
RecoveryRate |
CasesPer100k |
RiskLevel |
| 0 |
USA |
34000000 |
610000 |
28000000 |
331000000 |
5390000 |
1.79 |
82.35 |
10271.90 |
Moderate |
| 1 |
India |
31000000 |
410000 |
30000000 |
1380000000 |
590000 |
1.32 |
96.77 |
2246.38 |
Moderate |
| 2 |
Brazil |
19000000 |
530000 |
17000000 |
213000000 |
1470000 |
2.79 |
89.47 |
8920.19 |
High |
| 3 |
Russia |
7000000 |
190000 |
6500000 |
146000000 |
310000 |
2.71 |
92.86 |
4794.52 |
High |
| 4 |
UK |
5000000 |
130000 |
4800000 |
68000000 |
70000 |
2.60 |
96.00 |
7352.94 |
High |
| 5 |
France |
5800000 |
110000 |
5000000 |
67000000 |
690000 |
1.90 |
86.21 |
8656.72 |
Moderate |
| 6 |
Spain |
3700000 |
81000 |
3500000 |
47000000 |
119000 |
2.19 |
94.59 |
7872.34 |
High |
| 7 |
Italy |
4200000 |
127000 |
3900000 |
60000000 |
173000 |
3.02 |
92.86 |
7000.00 |
High |
sns.countplot(x="RiskLevel", data=df)
plt.title("Country Risk Level Count")
plt.show()

Score: 15