做bryant数据可视化之前需要先爬取数据(代码↓)
数据来自http://www.stat-nba.com/(NBA数据库)
(小白初学python)
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import time
import re
import csv
import random
def get_html(url_toget):
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235'
}
timeout = random.choice(range(80, 100))
try:
page = requests.get(url_toget, headers=header, timeout=timeout)
page.encoding = 'utf-8'
except:
print('error')
page_inner = page.text
return page_inner
def get_data(i,page_text):
bryant = []
soup = BeautifulSoup(page_text, 'lxml')
body = soup.body
page = body.find("div", {"id": "page"})
tbody = page.find('tbody')
trs = tbody.find_all('tr')
for tr in trs:
tds = tr.find_all('td')
saiji = '%s-%s'%(i,i+1)
bryant.append(saiji)
for td in tds:
bryant.append(td.string)
with open("bryant_data.csv", 'a', errors='ignore', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerow(bryant)
bryant = []
print('bryant%s-%s赛季的数据已加载完毕......' % (i,i+1))
if __name__ == '__main__':
tou = ['','','球员','赛季','结果','比赛','首发','时间','投篮','命中','出手',
'三分','命中','出手','罚球命中率','罚球命中','罚球出手数',
'总篮板','前场篮板','后场篮板','助攻','抢断','盖帽','失误','犯规','得分']
with open("bryant_data.csv", 'a', errors='ignore', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerow(tou)
for j in range(1996,2016):#2016
kobe_url = 'http://www.stat-nba.com/query.php?crtcol=season&&PageNum=200&' \
'order=0&QueryType=game&GameType=season&Player_id=195&Season0=%s&Season1=%s&Team_id=LAL' % (j, j + 1)
w = get_html(kobe_url)
get_data(j,w)
print('全部数据已加载完毕')
导出数据(csv格式)用excel查看:
然后做数据可视化处理
from pyecharts import Bar
import csv
score = [] #存储科比96-16赛季的所有得分
time_saiji = []#存储时间线
with open("bryant_data.csv", "r") as f:
reader = csv.reader(f)
rows = [row for row in reader]
for i in rows:
score.append(i[25])
time_saiji.append(i[0])
score.pop(0)#删掉第一个元素(表头)
time_saiji.pop(0)
bar = Bar('Kobe Bryant data','(score)')
bar.add('score',time_saiji,score,mark_line=['average'],mark_point=["max", "min"])
bar.render()
结果显示: