Python Data Analysis Visualization Project - Game Sales Volume

Data source

The data for this project is sourced from the Kaggle dataset at the address https://www.kaggle.com/datasets/gregorut/videogamesales , size 390kb.

  • The data indicators include:
    • Rank - Sales Ranking
    • Name - Game name
    • Platform - Gaming Platform
    • Year - Year of issuance
    • Genre - Game Type
    • Publisher - Publisher
    • NA_Sales - North American Sales Miles
    • EU_Sales - European Sales Miles
    • JP_Sales - Japan Sales Miles
    • Other_Sales - Other Regional Sales Miles
    • Global_Sales - Total Sales Volume
Visualization

Data preparation

import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('data\vgsales.csv',)
print(data.columns)
print(data.head(5))

Game publication year and sales statistics - line chart

#group and sort by year 
data = data.groupby("Year")
data = data.sum().sort_values("Year",ascending=False)
print(data.head(5))
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(data.index,data["Global_Sales"],color="black",label="Global_Sales")
ax1.plot(data.index,data["NA_Sales"],color="blue",alpha=0.9,label="NA_Sales")#transparency 
ax1.plot(data.index,data["EU_Sales"],color="yellow",label='EU_Sales')
ax1.plot(data.index,data["JP_Sales"],color="pink",label="JP_Sales")
ax1.set_xlabel("year of issuance")
ax1.set_ylabel("sales volume")
ax1.legend()
plt.savefig('image/mplot.png')
plt.show()

lst_genre = list(set(data["Genre"].values))
print(lst_genre)
data = data.groupby(["Year","Genre"]).sum()
print(type(data))
print(data.head(5))
data = data.reset_index()
fig, axrr = plt.subplots(3,4)
ax1 = axrr[0][0]
print(type(ax1))
print(data.head(50))
for i in range(3):
for j in range(4):
  ax = axrr[i][j]
  ax.plot(data[data["Genre"]==lst_genre[i+j]]["Year"],
           data[data["Genre"]==lst_genre[i+j]]["Global_Sales"],
          label=lst_genre[i+j])
  ax.set_xlabel("Year")
  ax.set_ylabel("Global_Sales")
  ax.legend(loc='upper left')
fig.suptitle("the sales time curve of game types")

Sales statistics of game types, publishers, and gaming platforms - bar chart

dataPla = data.groupby("Platform")
dataPla = dataPla.sum().sort_values("Global_Sales",ascending=False)
# dataPla = dataPla[dataPla['Global_Sales']>100]
dataPla = dataPla[:10]
dataPub = data.groupby("Publisher")
dataPub = dataPub.sum().sort_values("Global_Sales",ascending=False)
dataPub = dataPub[dataPub['Global_Sales']>100]
dataPub = dataPub[:10]
dataGen = data.groupby("Genre")
dataGen = dataGen.sum().sort_values("Global_Sales",ascending=False)
dataGen = dataGen[:10]
print(data.head(5))
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
fig = plt.figure()
ax1 = fig.add_subplot(311)
ax2 = fig.add_subplot(312)
ax3 = fig.add_subplot(313)
ax1.bar(dataPla.index,dataPla['Global_Sales'])
ax2.bar(dataPub.index,dataPub['Global_Sales'])
ax3.bar(dataGen.index,dataGen['Global_Sales'])

Game Type Scale - Pie Chart

plt.pie(dataGen['Global_Sales'],labels=dataGen.index,autopct="%.3f%%")
plt.savefig('image/mpie.png')

Do these first -2022.6.11.


Related articles