pandas.DataFrame documentation
第一个程序:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
values = np.array([1, 3, 2, 4, 1, 6, 4])
example_df = pd.DataFrame({
'value': values,
'even': values % 2 == 0,
'above_three': values > 3
}, index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])
# groupby() without as_index
first_even = example_df.groupby('even').first()
print(example_df)
print(first_even)
print(first_even['even']) # Causes an error. 'even' is no longer a column in the DataFrame
- 输出结果:
# 第一个print
above_three even value
a False False 1
b False False 3
c False True 2
d True True 4
e False False 1
f True True 6
g True True 4
# 第二个print
above_three value
even
False False 1
True False 2
# 第三个print
KeyError: 'even'
第二个程序:
first_even = example_df.groupby('even', as_index=False).first()
print(first_even)
print(first_even['even']) # Now 'even' is still a column in the DataFrame
- 输出结果:
# 第一个print
even above_three value
0 False False 1
1 True False 2
# 第二个print
0 False
1 True
Name: even, dtype: bool
画出地铁流量分布图:
nyc_subway_weather.csv
ctrl+s保存文件
import pandas as pd
filename = 'E:/Deep Learning/Udacity/Data_Analysis/nyc_subway_weather.csv'
subway_df = pd.read_csv(filename)
data_by_location = subway_df.groupby(['latitude','longitude'],as_index=False).mean()
data_by_location.head()['latitude']
scaled_entries = (data_by_location['ENTRIESn_hourly']/data_by_location['ENTRIESn_hourly'].std())
import matplotlib.pyplot as plt
plt.scatter(data_by_location['latitude'],data_by_location['longitude'],s=2*scaled_entries)
#scaled_entries乘以2使输出点大小更适中
plt.show()