省略了手工导出数据的麻烦,而且爱站说还要VIP会员才能导出。
自己能用就行,代码写得很渣。
import urllib.request from lxml import etree import time import os os.environ['NO_PROXY'] = 'aizhan.com/' for x in range(1,51): url = "https://baidurank.aizhan.com/baidu/huangye88.com/product/0/"+ str(x) +"/exp/-1/" request = urllib.request.Request(url) response = urllib.request.urlopen(request).read() data = response.decode('utf-8','ignore').replace(u'\xa9', u'') selector = etree.HTML(data) words = selector.xpath('//a[@class="gray" and parent::*[@class="title" ]]/text()') keywords = ''.join(words) with open("a.txt","a") as f: f.write(keywords.replace('\t','')) time.sleep(10) print("已经完成第"+ str(x) + "页内容的抓取......")