Ratings

Mon 30 June 2025
# 1. Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 2. Create a sample movie dataset
data = {
    "title": ["Inception", "Titanic", "Avengers", "Shrek", "Up", "Coco", "Joker", "Frozen", "Toy Story", "Tenet"],
    "genre": ["Sci-Fi", "Romance", "Action", "Animation", "Animation", "Animation", "Drama", "Animation", "Animation", "Sci-Fi"],
    "year": [2010, 1997, 2012, 2001, 2009, 2017, 2019, 2013, 1995, 2020],
    "rating": [8.8, 7.8, 8.1, 7.9, 8.3, 8.4, 8.5, 7.5, 8.3, 7.4],
    "votes": [2000000, 1100000, 1800000, 600000, 800000, 500000, 1000000, 700000, 900000, 400000]
}
# 3. Convert to DataFrame
df = pd.DataFrame(data)
df
title genre year rating votes
0 Inception Sci-Fi 2010 8.8 2000000
1 Titanic Romance 1997 7.8 1100000
2 Avengers Action 2012 8.1 1800000
3 Shrek Animation 2001 7.9 600000
4 Up Animation 2009 8.3 800000
5 Coco Animation 2017 8.4 500000
6 Joker Drama 2019 8.5 1000000
7 Frozen Animation 2013 7.5 700000
8 Toy Story Animation 1995 8.3 900000
9 Tenet Sci-Fi 2020 7.4 400000
# 4. Check data types and info
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   title   10 non-null     object 
 1   genre   10 non-null     object 
 2   year    10 non-null     int64  
 3   rating  10 non-null     float64
 4   votes   10 non-null     int64  
dtypes: float64(1), int64(2), object(2)
memory usage: 532.0+ bytes
# 5. Basic statistics
df.describe()
year rating votes
count 10.00000 10.000000 1.000000e+01
mean 2009.30000 8.100000 9.800000e+05
std 8.90755 0.447214 5.329165e+05
min 1995.00000 7.400000 4.000000e+05
25% 2003.00000 7.825000 6.250000e+05
50% 2011.00000 8.200000 8.500000e+05
75% 2016.00000 8.375000 1.075000e+06
max 2020.00000 8.800000 2.000000e+06
# 6. Add a column for rating category
def rate_group(r):
    if r >= 8.5:
        return "Excellent"
    elif r >= 8.0:
        return "Good"
    elif r >= 7.5:
        return "Average"
    else:
        return "Below Average"

df["rating_group"] = df["rating"].apply(rate_group)
# 7. Count of each rating group
df["rating_group"].value_counts()
rating_group
Good             4
Average          3
Excellent        2
Below Average    1
Name: count, dtype: int64
# 8. Count of each genre
df["genre"].value_counts()
genre
Animation    5
Sci-Fi       2
Romance      1
Action       1
Drama        1
Name: count, dtype: int64
# 9. Top rated movie
df[df["rating"] == df["rating"].max()]
title genre year rating votes rating_group
0 Inception Sci-Fi 2010 8.8 2000000 Excellent
# 10. Lowest rated movie
df[df["rating"] == df["rating"].min()]
title genre year rating votes rating_group
9 Tenet Sci-Fi 2020 7.4 400000 Below Average
# 11. Filter animation movies
df[df["genre"] == "Animation"]
title genre year rating votes rating_group
3 Shrek Animation 2001 7.9 600000 Average
4 Up Animation 2009 8.3 800000 Good
5 Coco Animation 2017 8.4 500000 Good
7 Frozen Animation 2013 7.5 700000 Average
8 Toy Story Animation 1995 8.3 900000 Good
# 12. Movies released after 2010
df[df["year"] > 2010]
title genre year rating votes rating_group
2 Avengers Action 2012 8.1 1800000 Good
5 Coco Animation 2017 8.4 500000 Good
6 Joker Drama 2019 8.5 1000000 Excellent
7 Frozen Animation 2013 7.5 700000 Average
9 Tenet Sci-Fi 2020 7.4 400000 Below Average
# 13. Group by genre and average rating
df.groupby("genre")["rating"].mean()
genre
Action       8.10
Animation    8.08
Drama        8.50
Romance      7.80
Sci-Fi       8.10
Name: rating, dtype: float64
# 14. Visualization: Rating Distribution
plt.figure(figsize=(8, 4))
plt.hist(df["rating"], bins=5, color="skyblue", edgecolor="black")
plt.title("Rating Distribution")
plt.xlabel("Rating")
plt.ylabel("Number of Movies")
plt.grid(True)
plt.show()

png

# 15. Bar chart: Votes per movie
plt.figure(figsize=(10, 5))
plt.bar(df["title"], df["votes"], color="orange")
plt.xticks(rotation=45)
plt.title("Votes per Movie")
plt.ylabel("Number of Votes")
plt.tight_layout()
plt.show()

png

# 16. Scatter plot: Ratings vs Votes
plt.figure(figsize=(8, 5))
plt.scatter(df["rating"], df["votes"], color="green")
plt.title("Ratings vs Votes")
plt.xlabel("Rating")
plt.ylabel("Votes")
plt.grid(True)
plt.show()

png



Score: 15

Category: basics