100字范文,内容丰富有趣,生活中的好帮手!
100字范文 > 利用xpath爬取斗鱼主播热度和房间标题

利用xpath爬取斗鱼主播热度和房间标题

时间:2023-02-13 19:38:03

相关推荐

利用xpath爬取斗鱼主播热度和房间标题

import requestsimport pandas as pdfrom bs4 import BeautifulSoupimport osimport timeimport randomfrom lxml import etreedef get_html_text(url):""":rtype: object"""# 用户代理headers = [{"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko)"" Chrome/35.0.1916.153 Safari/537.36"},{"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/0101 Firefox/30.0"},{"User-Agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)"}]head = random.choice(headers)# ip代理proxies = [{"http": "123.206.25.108:808"},{"http": "61.150.96.27:36880"},{"http": "1.198.73.42:9999"},]proxie = random.choice(proxies)try:r = requests.get(url, timeout=30, headers=head, proxies=proxie)r.raise_for_status()r.encoding = r.apparent_encodingreturn r.textexcept Exception as e:return edef get_live_infolist(url, live_info_lists):# 利用Xpath进行网页查找html = get_html_text(url)dom = etree.HTML(html)current_hots = dom.xpath('//li/div/a/div[@class="DyListCover-content"]/div[@class="DyListCover-info"]''/span[@class="DyListCover-hot is-template"]/text()')live_users = dom.xpath('//li/div/a/div[@class="DyListCover-content"]/div[@class="DyListCover-info"]''/h2[@class="DyListCover-user is-template"]/text()')live_zones = dom.xpath('//li/div/a/div[@class="DyListCover-content"]/div[@class="DyListCover-info"]''/span[@class="DyListCover-zone"]/text()')print(live_zones)print(live_users)print(current_hots)live_info_lists.append(current_hots)live_info_lists.append(live_users)live_info_lists.append(live_zones)return live_info_listsdef save_live_info(live_info_lists):DataSet = list(zip(live_info_lists[0], live_info_lists[1], live_info_lists[2]))df = pd.DataFrame(data=DataSet, columns=['分区', '主播', '实时热度'])print(df)try:df.to_csv("douyu_data.csv", mode="a+", encoding="gb18030")except Exception as e:print(e)def main():url = '/g_wzry'list1 = []get_html_text(url)get_live_infolist(url, list1)save_live_info(list1)if __name__ == "__main__":main()

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。