Employee

Mon 30 June 2025

pip install seaborn

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from seaborn) (2.3.1)
Requirement already satisfied: pandas>=1.2 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from seaborn) (2.3.0)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from seaborn) (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.2)
Requirement already satisfied: cycler>=0.10 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.58.4)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.8)
Requirement already satisfied: packaging>=20.0 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (24.2)
Requirement already satisfied: pillow>=8 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (11.2.1)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.3)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from pandas>=1.2->seaborn) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from pandas>=1.2->seaborn) (2025.2)
Requirement already satisfied: six>=1.5 in c:\users\hp\miniconda3\envs\py312\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2
Note: you may need to restart the kernel to use updated packages.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Enable inline plotting
%matplotlib inline

data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eva", "Frank", "Grace", "Henry"],
    "Age": [25, 30, np.nan, 45, 28, 33, 38, np.nan],
    "Salary": [50000, 60000, 55000, 65000, np.nan, 70000, 62000, 58000],
    "Department": ["HR", "Finance", "IT", "Finance", "HR", "IT", "IT", "HR"]
}
df = pd.DataFrame(data)
df

	Name	Age	Salary	Department
0	Alice	25.0	50000.0	HR
1	Bob	30.0	60000.0	Finance
2	Charlie	NaN	55000.0	IT
3	David	45.0	65000.0	Finance
4	Eva	28.0	NaN	HR
5	Frank	33.0	70000.0	IT
6	Grace	38.0	62000.0	IT
7	Henry	NaN	58000.0	HR

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        8 non-null      object 
 1   Age         6 non-null      float64
 2   Salary      7 non-null      float64
 3   Department  8 non-null      object 
dtypes: float64(2), object(2)
memory usage: 388.0+ bytes

df.isnull().sum()

Name          0
Age           2
Salary        1
Department    0
dtype: int64

df['Age'] = df['Age'].fillna(df['Age'].median())
df

	Name	Age	Salary	Department
0	Alice	25.0	50000.0	HR
1	Bob	30.0	60000.0	Finance
2	Charlie	31.5	55000.0	IT
3	David	45.0	65000.0	Finance
4	Eva	28.0	NaN	HR
5	Frank	33.0	70000.0	IT
6	Grace	38.0	62000.0	IT
7	Henry	31.5	58000.0	HR

df['Salary'] = df['Salary'].fillna(df['Salary'].mean())
df

	Name	Age	Salary	Department
0	Alice	25.0	50000.0	HR
1	Bob	30.0	60000.0	Finance
2	Charlie	31.5	55000.0	IT
3	David	45.0	65000.0	Finance
4	Eva	28.0	60000.0	HR
5	Frank	33.0	70000.0	IT
6	Grace	38.0	62000.0	IT
7	Henry	31.5	58000.0	HR

df.describe()

	Age	Salary
count	8.00000	8.000000
mean	32.75000	60000.000000
std	6.22208	6071.008389
min	25.00000	50000.000000
25%	29.50000	57250.000000
50%	31.50000	60000.000000
75%	34.25000	62750.000000
max	45.00000	70000.000000

sns.histplot(df['Age'], kde=True, bins=5, color='skyblue')
plt.title("Age Distribution")
plt.xlabel("Age")
plt.ylabel("Count")
plt.show()

png

sns.countplot(x="Department", data=df, palette="viridis")
plt.title("Department Count")
plt.show()

C:\Users\HP\AppData\Local\Temp\ipykernel_1960\1748937494.py:1: FutureWarning:

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x="Department", data=df, palette="viridis")

png

sns.boxplot(x="Department", y="Salary", data=df)
plt.title("Salary Distribution by Department")
plt.show()

png

df['Experience'] = [2, 5, 3, 10, 4, 8, 6, 7]
df

	Name	Age	Salary	Department	Experience
0	Alice	25.0	50000.0	HR	2
1	Bob	30.0	60000.0	Finance	5
2	Charlie	31.5	55000.0	IT	3
3	David	45.0	65000.0	Finance	10
4	Eva	28.0	60000.0	HR	4
5	Frank	33.0	70000.0	IT	8
6	Grace	38.0	62000.0	IT	6
7	Henry	31.5	58000.0	HR	7

df.corr(numeric_only=True)

	Age	Salary	Experience
Age	1.000000	0.606989	0.819292
Salary	0.606989	1.000000	0.819845
Experience	0.819292	0.819845	1.000000

df.corr(numeric_only=True)

	Age	Salary	Experience
Age	1.000000	0.606989	0.819292
Salary	0.606989	1.000000	0.819845
Experience	0.819292	0.819845	1.000000

sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

png

df[df['Salary'] > 60000]

	Name	Age	Salary	Department	Experience
3	David	45.0	65000.0	Finance	10
5	Frank	33.0	70000.0	IT	8
6	Grace	38.0	62000.0	IT	6

df.groupby('Department')['Salary'].mean()

Department
Finance    62500.000000
HR         56000.000000
IT         62333.333333
Name: Salary, dtype: float64

plt.scatter(df['Age'], df['Salary'], color='green')
plt.xlabel('Age')
plt.ylabel('Salary')
plt.title('Age vs Salary')
plt.grid(True)
plt.show()

png

df['Department'].value_counts().plot.pie(autopct='%1.1f%%', startangle=90)
plt.title('Department Distribution')
plt.ylabel('')
plt.show()

png

sns.lineplot(x='Experience', y='Salary', data=df, marker='o')
plt.title("Salary vs Experience")
plt.show()

png

df['Bonus'] = df['Salary'] * 0.1
df

	Name	Age	Salary	Department	Experience	Bonus
0	Alice	25.0	50000.0	HR	2	5000.0
1	Bob	30.0	60000.0	Finance	5	6000.0
2	Charlie	31.5	55000.0	IT	3	5500.0
3	David	45.0	65000.0	Finance	10	6500.0
4	Eva	28.0	60000.0	HR	4	6000.0
5	Frank	33.0	70000.0	IT	8	7000.0
6	Grace	38.0	62000.0	IT	6	6200.0
7	Henry	31.5	58000.0	HR	7	5800.0

df.head()

	Name	Age	Salary	Department	Experience	Bonus
0	Alice	25.0	50000.0	HR	2	5000.0
1	Bob	30.0	60000.0	Finance	5	6000.0
2	Charlie	31.5	55000.0	IT	3	5500.0
3	David	45.0	65000.0	Finance	10	6500.0
4	Eva	28.0	60000.0	HR	4	6000.0

df.to_csv("cleaned_employee_data.csv", index=False)
print("Saved to cleaned_employee_data.csv")

Saved to cleaned_employee_data.csv

Score: 20

Category: basics