干代码就完了!!
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
plt.rcParams['font.sans-serif']=['SimHei']#提前设置字体,以便可以显示中文
df = pd.read_csv('D:/Womens Clothing E-Commerce Reviews.csv') #这里不添加 index_col = 0/1/2 ~ ~ ~ ,自动添加索引
# df.info() #显示表信息,可查看是否存在缺失值
# print(df.head()) #.tail() #要用print()才能看到结果;head()默认前5行
# print(df.describe()) #各字段的描述统计分析
# df = df.dropna() #去除缺失值
# plt.hist(df['Age'],color='blue',label='Age') #直方图
# plt.legend() #负责图例是否显示
# plt.xlabel('Age')
# plt.ylabel('Count')
# plt.title('Age distribution')
# plt.show()
# plt.figure(figsize=(10,8)) #不限定画幅大小也可以出图
# sns.boxplot(x='Rating',y='Age',data=df)
# plt.show()
# print(df['Class Name'].unique()) #去重
# rd = df[df['Recommended IND'] == 1]
# nrd = df[df['Recommended IND'] == 0]
# print(rd.head())
# plt.xticks(rotation=45)
# plt.hist(rd['Department Name'],color='blue',alpha=0.5,label='rd')
# plt.hist(nrd['Department Name'],color='red',alpha=0.1,label='nrd')#alpha越小越透明
# plt.legend()
# plt.title('你最骚')#前面设置了字体。才会正常显示,不设置的话就会乱码
# plt.show()
df['Review Length'] = df['Review Text'].astype(str).apply(len) #新增了一列,apply()的用法可百度干一下
# print(df.head())
ax = sns.distplot(df['Review Length'], color="blue") #不加ax = 也行,ax是啥自行干
plt.title("Length of Reviews")
plt.show()