forked from learn-co-curriculum/dsc-phase-1-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.py
56 lines (39 loc) · 1.87 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
def dropOnes(df, jobCategory):
df_pop = df[df.category == jobCategory]
countList = df_pop.groupby('primary_name')['movie'].count().sort_values(ascending=False)
#print(list(countList.index[0:52]), list(countList)[0:52])
for i in range(0, len(countList)):
if countList[i] == 1:
stop = i
break
#print(stop)
keepNameList = list(countList[:stop].index)
#print(keepNameList)
df_pop = df_pop.loc[df_pop['primary_name'].isin(keepNameList)]
df_pop.shape
return df_pop
def categoryStudy(df, jobCategory, maxList, figHeight, budget):
dFrame = dropOnes(df, jobCategory)
print("Total number: {}".format(len(dFrame['primary_name'].unique())))
series1 = dFrame.groupby('primary_name')['profit_gross'].median().sort_values(ascending=False)[0:maxList]
series2 = dFrame.groupby('primary_name')['profit_rate'].median().sort_values(ascending=False)[0:maxList]
best1 = list(series1.index)
best2 = list(series2.index)
best = list(set(best1).intersection(best2))
print('Best {} List: '.format(jobCategory), best)
fig, axes = plt.subplots(1, 2, figsize=(20, figHeight))
plt.subplots_adjust(wspace=0.6)
sns.set(font_scale=1.5)
series1.plot.barh(ax=axes[0])
axes[0].set_title('Median Profit per {} (Production Budget > $100,000,000)'.format(jobCategory))
axes[0].set_xlabel('Median Profit')
axes[0].set_ylabel('{} Name'.format(jobCategory))
series2.plot.barh(ax=axes[1], color='green')
axes[1].set_title('Median Profit Rate per {} (Production Budget > $100,000,000)'.format(jobCategory))
axes[1].set_xlabel('Median Profit Rate')
axes[1].set_ylabel('{} Name'.format(jobCategory))
plt.savefig('figures/{}-profit_{}Budget.png'.format(jobCategory, budget))