用python绘制一张炫酷的玫瑰图!
01
获取网页数据
启动webdriver, 打开网页
定位标题和下载图片
获取当前页数据,并实现翻页,获取所有数据
from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
import csv
import time
# 01. 启动 webdirver, 打开网页
browser = webdriver.Chrome('/usr/local/chromedriver')# 此处填入自己的webdriver exe 的文件路径
browser.get('https://www.visualcapitalist.com/which-streaming-service-has-the-most-subscriptions/')
browser.implicitly_wait(5) # 等待内容加载完毕
# 02. 获取标题和下载图片
title = browser.find_element(By.CSS_SELECTOR,'#mvp-post-head > h1').text + '.jpg'
pic = browser.find_element(By.CSS_SELECTOR,'#mvp-post-head > span > p:nth-child(1) > img').get_attribute('src')
res = requests.get(pic).content
imagepath = './' + title
with open(imagepath, 'wb') as fp:
fp.write(res)
time.sleep(2)
# 03. 点击翻页,获取所有数据
with open('./data.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow(['services', 'subscribers','types'])
for i in range(1,4):
services = browser.find_elements(By.CSS_SELECTOR, '#tablepress-1461 > tbody > tr > td.column-1')
types = browser.find_elements(By.CSS_SELECTOR, '#tablepress-1461 > tbody > tr > td.column-2')
subscribers = browser.find_elements(By.CSS_SELECTOR, '#tablepress-1461 > tbody > tr > td.column-3')
column1 = []
column2 = []
column3 = []
for service in services:
column1.append(service.text.strip())
for subscriber in subscribers:
column2.append(subscriber.text.strip())
for type in types:
column3.append(type.text.strip())
lis = []
for i in zip(column1, column2,column3):
lis.append(list(i))
with open('./data.csv','a') as f:
writer = csv.writer(f)
writer.writerows(lis)
next_button =browser.find_element(By.XPATH,'//*[@id="tablepress-1461_next"]')
next_button.click()# 以上两行实现翻页,获取所有数据
print('数据成功获取')
browser.quit()# 获取完毕,关闭网页
02
绘制玫瑰图
import pandas as pd
from pyecharts.charts import Pie
from pyecharts import options as opts
# 01. 读取并整理数据
data = pd.read_csv('data.csv',header=1)
data = data.drop_duplicates(keep=False)
data = data.reset_index(drop=True)
data['subscribers'] = data['subscribers'].str.replace('M','')
data['subscribers'] = data['subscribers'].astype('float').round()
x_data = data['services']
y_data = data['subscribers']
# 自定义颜色,根据服务商类型提供4种颜色,可以在ps中打开原图,获取颜色的十六进制代码
color_dic = {'Video':'#fbf3dc',
'Audio':'#1b1718',
'Video/Audio':'#833100',
'News':'#f1a15c'
}
data['colors'] = data['types'].map(color_dic)
# 准备数据对data_pair
df = [list(z) for z in zip(x_data,y_data)]
# print(df)
# 02. 使用 pyecharts Pie 绘制玫瑰图
pie = Pie(init_opts=opts.InitOpts(bg_color='#d66f0a',width='650px',height='1000px'))
pie.add(series_name='services',
data_pair=df,
radius=['12%','200%'],#设置内圆和外圆的半径
center=['35%','70%'], #设置圆的位置
rosetype='area',) # 关键步骤!设置类型为area, 会展示为玫瑰图
pie.set_global_opts(
legend_opts=opts.LegendOpts(is_show=False), #不显示图例
)
pie.set_series_opts(
label_opts=opts.LabelOpts(position='inside', # 标签位置
rotate = 45,
font_size = 14, # 字体大小
formatter="{c}",color = '#d66f0a') # 标签格式
)
pie.set_colors(list(data['colors'])) # 渲染颜色
pie.render('streaming war.html') #出图
03
总结一下
获取数据,难点在于使用selenium 定位全部数据,需要用for 循环配合点击Next 按钮完成。
绘制数据,关键在于数据格式的整理,以及将饼图的rosetype 设为area 。
欢迎关注本号,可私信获取源码和数据~
END