用python绘制一张炫酷的玫瑰图!
01
获取网页数据
启动webdriver, 打开网页
定位标题和下载图片
获取当前页数据,并实现翻页,获取所有数据
from selenium import webdriverfrom selenium.webdriver.common.by import Byimport requestsimport csvimport time# 01. 启动 webdirver, 打开网页browser = webdriver.Chrome('/usr/local/chromedriver')# 此处填入自己的webdriver exe 的文件路径browser.get('https://www.visualcapitalist.com/which-streaming-service-has-the-most-subscriptions/')browser.implicitly_wait(5) # 等待内容加载完毕# 02. 获取标题和下载图片title = browser.find_element(By.CSS_SELECTOR,'#mvp-post-head > h1').text + '.jpg'pic = browser.find_element(By.CSS_SELECTOR,'#mvp-post-head > span > p:nth-child(1) > img').get_attribute('src')res = requests.get(pic).contentimagepath = './' + titlewith open(imagepath, 'wb') as fp:fp.write(res)time.sleep(2)# 03. 点击翻页,获取所有数据with open('./data.csv', 'a') as f:writer = csv.writer(f)writer.writerow(['services', 'subscribers','types'])for i in range(1,4):services = browser.find_elements(By.CSS_SELECTOR, '#tablepress-1461 > tbody > tr > td.column-1')types = browser.find_elements(By.CSS_SELECTOR, '#tablepress-1461 > tbody > tr > td.column-2')subscribers = browser.find_elements(By.CSS_SELECTOR, '#tablepress-1461 > tbody > tr > td.column-3')column1 = []column2 = []column3 = []for service in services:column1.append(service.text.strip())for subscriber in subscribers:column2.append(subscriber.text.strip())for type in types:column3.append(type.text.strip())lis = []for i in zip(column1, column2,column3):lis.append(list(i))with open('./data.csv','a') as f:writer = csv.writer(f)writer.writerows(lis)next_button =browser.find_element(By.XPATH,'//*[@id="tablepress-1461_next"]')next_button.click()# 以上两行实现翻页,获取所有数据print('数据成功获取')browser.quit()# 获取完毕,关闭网页
02
绘制玫瑰图
import pandas as pdfrom pyecharts.charts import Piefrom pyecharts import options as opts# 01. 读取并整理数据data = pd.read_csv('data.csv',header=1)data = data.drop_duplicates(keep=False)data = data.reset_index(drop=True)data['subscribers'] = data['subscribers'].str.replace('M','')data['subscribers'] = data['subscribers'].astype('float').round()x_data = data['services']y_data = data['subscribers']# 自定义颜色,根据服务商类型提供4种颜色,可以在ps中打开原图,获取颜色的十六进制代码color_dic = {'Video':'#fbf3dc','Audio':'#1b1718','Video/Audio':'#833100','News':'#f1a15c'}data['colors'] = data['types'].map(color_dic)# 准备数据对data_pairdf = [list(z) for z in zip(x_data,y_data)]# print(df)# 02. 使用 pyecharts Pie 绘制玫瑰图pie = Pie(init_opts=opts.InitOpts(bg_color='#d66f0a',width='650px',height='1000px'))pie.add(series_name='services',data_pair=df,radius=['12%','200%'],#设置内圆和外圆的半径center=['35%','70%'], #设置圆的位置rosetype='area',) # 关键步骤!设置类型为area, 会展示为玫瑰图pie.set_global_opts(legend_opts=opts.LegendOpts(is_show=False), #不显示图例)pie.set_series_opts(label_opts=opts.LabelOpts(position='inside', # 标签位置rotate = 45,font_size = 14, # 字体大小formatter="{c}",color = '#d66f0a') # 标签格式)pie.set_colors(list(data['colors'])) # 渲染颜色pie.render('streaming war.html') #出图
03
总结一下
获取数据,难点在于使用selenium 定位全部数据,需要用for 循环配合点击Next 按钮完成。
绘制数据,关键在于数据格式的整理,以及将饼图的rosetype 设为area 。
欢迎关注本号,可私信获取源码和数据~
END
