一、高速公路公司名单
http://summary.jrj.com.cn/hybk/400128959.shtml
http://data.eastmoney.com/bkzj/421.html
二、下载数据
网址:http://www.sse.com.cn/disclosure/listedinfo/regular/
深市:
http://www.szse.cn/disclosure/listed/fixed/index.html
三、源码下载
#coding:utf-8 import re import urllib.request import random import os def html_f(code): url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vCB_Bulletin/stockid/%s/page_type/ndbg.phtml'%code html = urllib.request.urlopen(url).read().decode('gbk') print(html) return html def targrt_f(html): target = r'&id=[0-9]{7}' #target1 = r'&id=[0-9]{6}' target_l = re.findall(target,html)# + re.findall(target1,html) print(target_l) return target_l def title_f(html): title = re.compile(r">(.*?)</a><br>") item = re.findall(title,html) print(item) return item def download(target,path,name): url = 'http://vip.stock.finance.sina.com.cn/corp/view/vCB_AllBulletinDetail.php?stockid=000048%s'%target url_html = urllib.request.urlopen(url).read().decode('gbk') target_pdf = re.findall("http://file.finance.sina.com.cn/211.154.219.97:9494/MRGG/CNSESZ_STOCK/.*?.PDF",url_html) try: print(target_pdf[0]) # local =r'%s.pdf'%name.encode('gbk') # x = random.randint(0,999999) urllib.request.urlretrieve(target_pdf[0],path+'\\%s.pdf' % name) except: print("pass") codes=['000885','600035','600012','600033','000900','600003','200429','000916','601188','000548','600350'] code2 = ['601518','000755','600106','601107','000828','001965','600269','000429','600368','600548','600020','600377]'] for code in codes: mkpath="C:\\Users\\Administrator\\Desktop\\e\\{}".format(code) path = os.makedirs(mkpath) html = html_f(code) target_list = targrt_f(html) name = title_f(html) if len(target_list) < len(name): name[len(target_list)] = name[0:len(target_list)] for each,title in zip(target_list,name): print(each,title) download(each,mkpath,title)
四、生成股票列表
import sys result=[] with open('stock.txt','r') as f: for line in f: result.append(line.strip('\n').split(',')[0]) print(result)