# Dependencies and Setup
import pandas as pd
# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()
player_count = len(purchase_data["SN"].unique())
print(f"There are {player_count} players total in Pymoli!")
item_count = len(purchase_data["Item ID"].unique())
print(f"There are {item_count} purchasable in-game items, total!")
avg_price = purchase_data["Price"].mean()
print("The average price of the items sold in-game was ${:.2f}".format(avg_price))
avg_price_string = "${:.2f}".format(avg_price)
item_sold_count = purchase_data["Purchase ID"].count()
print("The total number of items sold in the Pymoli game was", item_sold_count)
total_revenue = purchase_data["Price"].sum()
print("The total revenue from in-game item purchases was ${:,}".format(total_revenue))
total_revenue_string = "${:,}".format(total_revenue)
#put info into a dataframe and show as a table!
item_summary_stats_df = pd.DataFrame({"Number of Unique Items":[item_count],
"Average Price":[avg_price_string],
"Number of Purchases":[item_sold_count],
"Total Revenue":[total_revenue_string]})
item_summary_stats_df
#remember the total player count from earlier - this will help with percentages!
print("There are {} total players of Pymoli.".format(player_count))
#now find number of male players
all_male_purchases = purchase_data.loc[purchase_data["Gender"] == "Male"]
male_player_count = len(all_male_purchases["SN"].unique())
print("Of the {} total players of Pymoli, there are {} male players.".format(player_count, male_player_count))
#now find number of female players
all_female_purchases = purchase_data.loc[purchase_data["Gender"] == "Female"]
female_player_count = len(all_female_purchases["SN"].unique())
print("Of the {} total players of Pymoli, there are {} female players.".format(player_count, female_player_count))
#now find number of other/non-disclosed players
other_players = player_count - (male_player_count + female_player_count)
print("Of the {} total players of Pymoli, there are {} other/non-disclosed players.".format(player_count, other_players))
#create a dataframe table to hold this player count by gender information, as well as show the percentages of the
#player population that are categorized by each gender
stats_dict = {"Total Count":{"Male": male_player_count,
"Female": female_player_count,
"Other/Non-Disclosed": other_players},
"Percentage of Players":{"Male": ("{:.2%}".format(male_player_count/player_count)),
"Female": "{:.2%}".format(female_player_count/player_count),
"Other/Non-Disclosed": "{:.2%}".format(other_players/player_count)},}
player_gender_df = pd.DataFrame(stats_dict)
#output table with desired counts and percentages by gender
player_gender_df
purchases_by_gender = purchase_data["Gender"].value_counts()
total_purchases = purchases_by_gender.sum()
#summary series to show the different purchase counts by gender
print(purchases_by_gender)
#lookup the purhcase counts by gender and store in variables
male_purchases = purchases_by_gender["Male"]
female_purchases = purchases_by_gender["Female"]
other_purchases = total_purchases - (male_purchases + female_purchases)
#calculate average purchase price by gender
#first, filter all the purchase records to only include male purchases
male_purchases_only = purchase_data.loc[purchase_data["Gender"] == "Male"]
#get the average purchase price for any given male purchase
male_avg_purchase_price = male_purchases_only["Price"].mean()
male_avg_purchase_price = round(male_avg_purchase_price, 2)
print("The average in-game item purchase price in the Pymoli game for male players was ${}".format(male_avg_purchase_price))
#get the total revenue from male purchases
male_total_purchase_value = male_purchases_only["Price"].sum()
male_total_purchase_value = round(male_total_purchase_value, 2)
print("The total in-game item purchase price in the Pymoli game for male players was ${:,}".format(male_total_purchase_value))
#get the average purchase value per male player (average amount spent by each player)
avg_male_purchase_per_player = male_total_purchase_value/male_player_count
avg_male_purchase_per_player = round(avg_male_purchase_per_player, 2)
print("The average in-game item purchase amount in the Pymoli game for male players was ${:.2f}".format(avg_male_purchase_per_player))
#now, filter the purchase records to only include female purchases
female_purchases_only = purchase_data.loc[purchase_data["Gender"] == "Female"]
#get the average purchase price for any given female purchase
female_avg_purchase_price = female_purchases_only["Price"].mean()
female_avg_purchase_price = round(female_avg_purchase_price, 2)
print("The average in-game item purchase price in the Pymoli game for female players was ${:.2f}".format(female_avg_purchase_price))
#get the total revenue from female purchases
female_total_purchase_value = female_purchases_only["Price"].sum()
female_total_purchase_value = round(female_total_purchase_value, 2)
print("The total in-game item purchase price in the Pymoli game for female players was ${:,}".format(female_total_purchase_value))
#get the average purchase value per female player (average amount spent by each player)
avg_female_purchase_per_player = female_total_purchase_value/female_player_count
avg_female_purchase_per_player = round(avg_female_purchase_per_player, 2)
print("The average in-game item purchase amount in the Pymoli game for female players was ${:.2f}".format(avg_female_purchase_per_player))
#now, filter the purchase records to only include other/non-disclosed purchases
other_purchases_only = purchase_data.loc[((purchase_data["Gender"] != "Female") & (purchase_data["Gender"] != "Male")) ]
# print(other_purchases_only)
#get the average purchase price for any given 'other/undisclosed' purchase
other_avg_purchase_price = other_purchases_only["Price"].mean()
other_avg_purchase_price = round(other_avg_purchase_price, 2)
print("The average in-game item purchase price in the Pymoli game for 'other/undisclosed' gender players was ${:.2f}".format(other_avg_purchase_price))
#get the total revenue from 'other/undisclosed' purchases
other_total_purchase_value = other_purchases_only["Price"].sum()
other_total_purchase_value = round(other_total_purchase_value, 2)
print("The total in-game item purchase price in the Pymoli game for 'other/undisclosed' gender players was ${:,}".format(other_total_purchase_value))
#get the average purchase value per 'other/undisclosed' gender player (average amount spent by each player)
avg_other_purchase_per_player = other_total_purchase_value/other_players
avg_other_purchase_per_player = round(avg_other_purchase_per_player, 2)
print("The average in-game item purchase amount in the Pymoli game for 'other/undisclosed' gender players was ${:.2f}".format(avg_other_purchase_per_player))
purchase_count_dict = {"Male":male_purchases, "Female":female_purchases, "Other / Non-Disclosed":other_purchases}
avg_purchase_price_dict = {"Male":male_avg_purchase_price, "Female":female_avg_purchase_price, "Other / Non-Disclosed":other_avg_purchase_price}
total_purchase_dict = {"Male":male_total_purchase_value, "Female":female_total_purchase_value, "Other / Non-Disclosed":other_total_purchase_value}
avg_purchase_per_player_dict = {"Male":avg_male_purchase_per_player, "Female":avg_female_purchase_per_player, "Other / Non-Disclosed":avg_other_purchase_per_player}
overall_table_dict = {"Purchase Count":purchase_count_dict, "Average Purchase Price":avg_purchase_price_dict,
"Total Purchase Value":total_purchase_dict, "Avg Total Purchase Per Person":avg_purchase_per_player_dict}
purchases_summary_stats_df = pd.DataFrame(overall_table_dict)
purchases_summary_stats_df
#get age information for unique players - the mean of the ages should be the same as the age for each
#player, since they have the same age for each purchase
unique_players = purchase_data.loc[:, ["SN", "Age"]].groupby("SN").mean()
print("Here is the series of unique player SN's and ages: \n", unique_players)
bins = [0, 9, 14, 19, 24, 29, 34, 39, 200]
bin_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
age_df = pd.cut(unique_players["Age"], bins, labels=bin_labels)
age_table_df = pd.DataFrame(age_df.value_counts())
age_table_df["Percentage of Players"] = (age_table_df["Age"]/player_count).map("{:.2%}".format)
age_table_df.index.name = "Age"
age_table_df.columns = ["Player Count", "Percentage of Players"]
age_table_df.reindex(bin_labels)
purchase_data_copy = purchase_data.copy()
bins = [0, 9, 14, 19, 24, 29, 34, 39, 200]
bin_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
#replaces the number age with the age range bin
purchase_data_copy["Age"] = pd.cut(purchase_data_copy["Age"], bins, labels=bin_labels)
#get purchase counts by age bin
age_summary_df = purchase_data_copy.groupby("Age").count()
#remove extra columns
age_summary_df = age_summary_df.drop([age_summary_df.columns[a] for a in range(1, len(age_summary_df.columns))], axis=1)
age_summary_df.columns = ["Purchase Count"]
#print(age_summary_df)
#get average purchase prices for each age bin
avg_purchase_price_df = purchase_data_copy.groupby("Age").mean()
avg_purchase_price_df = avg_purchase_price_df.loc[:, "Price"].map("${:.2f}".format)
#add to the summary stats table
age_summary_df = age_summary_df.merge(avg_purchase_price_df, on="Age")
update_cols = list(age_summary_df.columns)
update_cols[-1] = "Average Purchase Price"
age_summary_df.columns = update_cols
#get total purchase values for each age bin
total_purchase_price_df = purchase_data_copy.groupby("Age").sum()
total_purchase_price_df = total_purchase_price_df.loc[:, "Price"].map("${:.2f}".format)
#add to the summary stats table
age_summary_df = age_summary_df.merge(total_purchase_price_df, on="Age")
update_cols = list(age_summary_df.columns)
update_cols[-1] = "Total Purchase Price"
age_summary_df.columns = update_cols
#get avg total purchase per person for each age bin
avg_total_purchase_price_df = purchase_data_copy.groupby("Age").sum()#/(age_table_df["Player Count"])
avg_total_purchase_price_df = pd.DataFrame(avg_total_purchase_price_df.loc[:, "Price"]).merge(
age_table_df.loc[:, "Player Count"], on="Age")
avg_total_purchase_price_df["Avg Total Purchase Per Person"] = avg_total_purchase_price_df["Price"]/avg_total_purchase_price_df["Player Count"]
#print(avg_total_purchase_price_df)
avg_total_purchase_price_df = avg_total_purchase_price_df.loc[:, "Avg Total Purchase Per Person"].map("${:.2f}".format)
#print(avg_total_purchase_price_df) -> correct avg total per person calc, now add to summary stats table
age_summary_df = age_summary_df.merge(avg_total_purchase_price_df, on="Age")
update_cols = list(age_summary_df.columns)
update_cols[-1] = "Avg Total Purchase Per Person"
age_summary_df.columns = update_cols
print("The summary stats table for age-related purchasing analysis is below:")
age_summary_df
#group data by SN's
top_spenders_df = pd.DataFrame(purchase_data.groupby("SN").count())
#remove extra columns
top_spenders_df = top_spenders_df.drop([top_spenders_df.columns[a] for a in range(1, len(top_spenders_df.columns))], axis=1)
top_spenders_df.columns = ["Purchase Count"]
#get the average purchase price in a DataFrame
avg_spent_df = pd.DataFrame(purchase_data.groupby("SN").mean())
avg_spent_df = avg_spent_df.loc[:, "Price"].map("${:.2f}".format)
#add into top_spenders_df
top_spenders_df = top_spenders_df.merge(avg_spent_df, on="SN")
update_cols = list(top_spenders_df.columns)
update_cols[-1] = "Average Purchase Price"
top_spenders_df.columns = update_cols
#get the total purchase price in a DataFrame
total_spent_df = pd.DataFrame(purchase_data.groupby("SN").sum())
#remove other columns, just leaving the price totals
total_spent_df = total_spent_df.loc[:, "Price"]
#add into top_spenders_df
top_spenders_df = top_spenders_df.merge(total_spent_df, on="SN")
update_cols = list(top_spenders_df.columns)
update_cols[-1] = "Total Purchase Price"
top_spenders_df.columns = update_cols
#sort values based on "Total Purchase Price"
top_spenders_df = top_spenders_df.sort_values(by="Total Purchase Price", ascending=False)
#can now update formatting to show dollar sign, otherwise the sorting is wrong because the
#"Total Purchase Price" column is sorting on strings, not numeric values!
top_spenders_df["Total Purchase Price"] = top_spenders_df["Total Purchase Price"].map("${:.2f}".format)
top_spenders_df.head(5)
#first, group items by both ID and Name
pop_items = purchase_data[["Item ID", "Item Name", "Price"]]
group_by_item = pop_items.groupby(["Item ID", "Item Name"])
#use the average price as the item price
pop_items = pd.DataFrame(group_by_item.mean())
pop_items.columns = ["Item Price (Average)"]
#get count of items purchased by item name and ID
item_purchase_count = group_by_item.count()
pop_items["Purchase Count"] = item_purchase_count
#get count of items purchased by item name and ID
item_purchase_total = group_by_item.sum()
pop_items["Purchase Amount"] = item_purchase_total
#sort the summary dataframe by item purchase count
pop_items = pop_items.sort_values(by="Purchase Count", ascending=False)
#format pop_items df so table looks better
formatted_pop_items = pop_items.copy()
formatted_pop_items["Item Price (Average)"] = pop_items["Item Price (Average)"].map("${:.2f}".format)
formatted_pop_items["Purchase Amount"] = pop_items["Purchase Amount"].map("${:.2f}".format)
formatted_pop_items.head()
#use original items table to sort by numeric values instead of strings
money_items = pop_items.copy()
money_items = money_items.sort_values(by="Purchase Amount", ascending=False)
#format table to look cleaner
formatted_money_items = money_items.copy()
formatted_money_items["Item Price (Average)"] = money_items["Item Price (Average)"].map("${:.2f}".format)
formatted_money_items["Purchase Amount"] = money_items["Purchase Amount"].map("${:.2f}".format)
formatted_money_items.head()
These trends in the purchasing data suggest that most of the game marketing and features should attract and support young male players (between the ages of 15 and 34) to grow the player base and drive revenue growth, since that demographic spends the most money in Heroes of Pymoli and has the largest market segment of the current player base. There are only 576 players of Heroes of Pymoli, so promoting the game to this demographic could increase the number of players, with an additional 1.35 items purchased on average per male player, and a \$4.07 average increase in revenue per additional male player. So if the marketing was able to result in attracting 1000 additional male players to Heroes of Pymoli, the revenue from in-game purchases would be expected to increase by about \\$4070.