# 导入可视化库
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(font_scale = 1)
plt.rcParams["axes.grid"] = False
plt.style.use('dark_background')
%matplotlib inline

# 绘制图形
plt.figure(figsize=(12,4))
plt.hist(movies_rating_count_avg['Rating Count'],bins=80,color='tab:purple')
plt.ylabel('Ratings Count(Scaled)', fontsize=16)
plt.savefig('ratingcounthist.jpg')

plt.figure(figsize=(12,4))
plt.hist(movies_rating_count_avg['Average Rating'],bins=80,color='tab:purple')
plt.ylabel('Average Rating',fontsize=16)
plt.savefig('avgratinghist.jpg')
# usecols 允许选择自己选择的特征，并通过dtype设定对应类型
movies_df=pd.read_csv('movies.csv',
                      usecols=['movieId','title'],
                      dtype={'movieId':'int32','title':'str'})
movies_df.head()
ratings_df=pd.read_csv('ratings.csv',
                       usecols=['userId', 'movieId', 'rating','timestamp'],
                       dtype={'userId': 'int32', 'movieId': 'int32', 'rating': 'float32'})
ratings_df.head()
# 检查缺失值
movies_df.isnull().sum()
print("Movies:",movies_df.shape)
print("Ratings:",ratings_df.shape)
plot=sns.jointplot(x='Average Rating',
                   y='Rating Count',
                   data=movies_rating_count_avg,
                   alpha=0.5,
                   color='tab:pink')
plot.savefig('joinplot.jpg')
