print('----------爬取所有场内基金名单开始----------')
url = 'http://fund.eastmoney.com/data/rankhandler.aspx?op=ph&dt=fb&ft=ct&rs=&gs=0&sc=clrq&st=asc&pi=1&pn=1000'
header = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/101.0.4951.67 Safari/537.36 ', 'Host': 'fund.eastmoney.com', 'Referer': 'http://fund.eastmoney.com/data/fbsfundranking.html' }
response = requests.get(url, headers=header) response.encoding = 'utf-8'
response_body = response.text.replace("var rankData = ", "").replace(";", "")
datas = response_body.split('"')
data_list = []
for i in range(1, len(datas), 2): item = [] for data in datas[i].split(','): item.append(data) data_list.append(item)
result = pd.DataFrame(data_list) file_name = datetime.datetime.now().strftime("%Y-%m-%d") + '-fund.csv'
result.drop(columns=[16, 17, 18, 19, 20], inplace=True)
result.columns = ['基金代码', '基金全称', '基金英文缩写', '日期', '单位净值', '累计净值', '近一周', '近一月', '近三月', '近六月', '近一年', '近两年', '近三年', '今年来', '成立来', '成立日期', '类型', '近五年']
fund_type = result.pop("类型") result.insert(3, "类型", fund_type) five_rate = result.pop("近五年") result.insert(14, "近五年", five_rate)
result.to_csv(file_name, encoding='gbk', index=False) print("总共爬取了" + str(len(datas) // 2) + "只场内基金数据") print('----------爬取所有场内基金名单结束----------')
|