这两天在学习pyecharts,刚好看到简书上有位大神写了篇关于星巴克门店分布的可视化,用的应该是0.5版本,尝试着用1.5的版本进行重写,原文章可参考:https://www.jianshu.com/p/27f42a623c12
首先是在kaggle中下载数据,然后导入数据,因为原数据里面的国家是简称,还得去匹配一下,我直接找了个网站,读取里面的表格进行匹配:
import pandas as pd
import numpy as np
import pyecharts
discovery = pd.read_csv(r'C:\Users\Administrator\Desktop\数据集\directory.csv')
discovery.head()
country = pd.read_html('https://yumingsuoxie.51240.com/')[1]
country = country.loc[1:,0:2]
country.columns = ['Country','Country_name_cn','Country_name']
country.head()
然后合并表格
data = pd.merge(discovery,country,left_on = 'Country',right_on = 'Country',how = 'left')
data.head()
我们打算做全球分布的情况,以及中国的城市的分布情况。
在操作过程中发现台湾单列出来了,要改成中国,
同时发现星巴克在国外有其他子品牌,这里我们单看星巴克。
data.Country_name_cn.unique()
data['Country_name_cn'].replace('中国台湾省','中国',inplace = True)
data['Country_name'].replace('Taiwan','China',inplace = True)
new_data = data[data['Brand'] == 'Starbucks']
new_data.head()
countries = new_data['Brand'].groupby(new_data['Country_name']).count().reset_index()
countries.sort_values('Brand',ascending = False, inplace = True)
countries.set_index('Country_name')
对国家维度进行统计排序,发现有70个国家有星巴克,直接看看地图分布
countries.Country_name.replace('United States of America','United States',inplace = True) #这里是把美国名称替换一下
from pyecharts.charts import Map
from pyecharts import options as opts
def map_world() -> Map:
c = (
Map()
.add("国家或地区",
[list(z) for z in zip(countries.Country_name, countries.Brand)], "world")
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="星巴克全球分布"),
visualmap_opts=opts.VisualMapOpts(max_=15000),
)
)
return c
df = map_world()
df.render_notebook()
也有另一种写法:
world_discovery = (
Map().add("国家或地区",
[list(z) for z in zip(countries.Country_name, countries.Brand)], "world")
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="星巴克全球分布"),
visualmap_opts=opts.VisualMapOpts(max_=15000),)
)
world_discovery.render_notebook()
然后我们再用条形图简单的看看前十分布
country_list = new_data.groupby('Country_name_cn')['Brand'].count().reset_index()
country_list = country_list.nlargest(10,'Brand')
country_list.set_index('Country_name_cn')
from pyecharts.charts import Bar
from pyecharts.globals import ThemeType
def country_bar():
c = (
Bar(init_opts = opts.InitOpts(theme = ThemeType.DARK))
.add_xaxis(country_list.Country_name_cn.tolist())
.add_yaxis('星巴克', country_list.Brand.tolist(),category_gap = '40%',
itemstyle_opts = opts.ItemStyleOpts(color = 'forestgreen'),)
.set_global_opts(xaxis_opts = opts.AxisOpts(axislabel_opts = opts.LabelOpts(rotate = 30)),
title_opts = opts.TitleOpts(title = '星巴克国家分布'))
)
return c
country_bar = country_bar()
country_bar.render_notebook()
temp = new_data.groupby('City')['Brand'].count().reset_index()
temp = temp.nlargest(10,'Brand')
temp.set_index('City')
def bar_base() :
c = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(temp.City.tolist())
.add_yaxis("星巴克", temp.Brand.tolist(), category_gap="40%",
itemstyle_opts=opts.ItemStyleOpts(color='forestgreen'),)
.set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=30)),
title_opts=opts.TitleOpts(title="星巴克城市分布"))
)
return c
city_bar = bar_base()
city_bar.render_notebook()
接着我们看看加盟情况
owner_style = new_data['Brand'].groupby(new_data['Ownership Type']).count().reset_index()
owner_style.columns = ['Ownership','number']
from pyecharts.charts import Pie
def owner_pie() -> Pie:
c = (
Pie()
.add("星巴克所有权", [list(z) for z in zip(owner_style.Ownership, owner_style.number)],
radius=["30%", "70%"],
rosetype="radius",
label_opts=opts.LabelOpts(
position="outside",
formatter="{a|{a}}{abg|}\n{hr|}\n {b|{b}: }{c} {per|{d}%} ",
background_color="#eee",
border_color="#aaa",
border_width=1,
border_radius=4,
rich={
"a": {"color": "#999", "lineHeight": 22, "align": "center"},
"abg": {
"backgroundColor": "#e3e3e3",
"width": "100%",
"align": "right",
"height": 22,
"borderRadius": [4, 4, 0, 0],
},
"hr": {
"borderColor": "#aaa",
"width": "100%",
"borderWidth": 0.5,
"height": 0,
},
"b": {"fontSize": 16, "lineHeight": 33},
"per": {
"color": "#eee",
"backgroundColor": "#334455",
"padding": [2, 4],
"borderRadius": 2,
},
},
),
)
.set_global_opts(title_opts=opts.TitleOpts(title="星巴克所有权"))
)
return c
owner_pie = owner_pie()
owner_pie.render_notebook()
再者,我们看看国内的分布,因为是国外的数据,城市名称会有问题,我们用经纬度去看
china_data = new_data[new_data['Country_name'] == 'China']
cities = china_data[['City','Longitude','Latitude']] # 这里是抽出三列想要的数据
cities.drop_duplicates('City', inplace = True) #这里把城市去重,用一个店铺来表示城市即可
cities_list = [list(z) for z in cities.values] #因为添加经纬度是 add_coordinate("某地", 100.0, 20.0) ,先转换成列表
china_city_list = china_data.groupby('City')['Brand'].count().reset_index()
china_city_list.columns = ['city','number']
from pyecharts.charts import Geo
from pyecharts.globals import ChartType, SymbolType
## 这里是用散点图
def china_city():
g = Geo()
g.add_schema(maptype = 'china')
for i in range(len(cities_list)):
g.add_coordinate(cities_list[i][0],cities_list[i][1],cities_list[i][2])
g.add('中国星巴克城市分布',
[list(z) for (z) in zip(china_city_list.city, china_city_list.number)],
type_ = ChartType.EFFECT_SCATTER,)
g.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
g.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_piecewise=True),
title_opts=opts.TitleOpts(title="中国星巴克分布情况"))
return g
china_city=china_city()
china_city.render_notebook()
#也可以用热力图,热力图会更直观一点,但是无法细化
def china_city():
g = Geo()
g.add_schema(maptype = 'china')
for i in range(len(cities_list)):
g.add_coordinate(cities_list[i][0],cities_list[i][1],cities_list[i][2])
g.add('中国星巴克城市分布',
[list(z) for (z) in zip(china_city_list.city, china_city_list.number)],
type_ = ChartType.HEATMAP,)
g.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
g.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_piecewise=True),
title_opts=opts.TitleOpts(title="中国星巴克分布情况"))
return g
china_city=china_city()
china_city.render_notebook()
当然也可以做店铺维度的分布
discovery = china_data[['Store Name','Longitude','Latitude']] # 这里是抽出三列想要的数据
shop_discovery = [list(z) for z in discovery.values] #因为添加经纬度是 add_coordinate("某地", 100.0, 20.0) ,先转换成列表
shop_list = china_data.groupby('Store Name')['Brand'].count().reset_index()
shop_list.columns = ['Store_Name','number']
def china_city():
g = Geo()
g.add_schema(maptype = 'china')
for i in range(len(shop_discovery)):
g.add_coordinate(shop_discovery[i][0],shop_discovery[i][1],shop_discovery[i][2])
g.add('中国星巴克城市分布',
[list(z) for (z) in zip(shop_list.Store_Name, shop_list.number)],
type_ = ChartType.EFFECT_SCATTER,
symbol_size = 4,
is_large = True
)
g.set_series_opts(label_opts=opts.LabelOpts(is_show=False),trail_length =1)
g.set_global_opts(
title_opts=opts.TitleOpts(title="中国星巴克店铺分布情况"))
return g
china_city=china_city()
china_city.render_notebook()