60963

python爬虫练习——爬取链家二手房信息(补充篇)

# -*- coding:utf-8 -*- import xlwt,xlrd from xlutils.copy import copy from lxml import etree import requests import time total_money = 0 total_number = 0 counties = ["beilin","weiyang","baqiao","xinchengqu","changan4","lianhu","yanta","gaoling1","jingkai1","xixian1","qujiang1","gaoxin5"] headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36' } date_time = time.strftime('%Y-%m-%d',time.localtime(time.time())) data_average = [date_time] #url = https://xa.lianjia.com/ershoufang/beilin/pg2/ for county in counties: url = "https://xa.lianjia.com/ershoufang/" + county + "/" response = requests.get(url, headers=headers) response.encoding = 'utf8' html = response.text root = etree.HTML(html) result = root.xpath("//div/@page-data") totalPage = eval(result[0])["totalPage"] for page in range(1,totalPage+1): url = "https://xa.lianjia.com/ershoufang/" + county +"/pg" + str(page) + "/" response = requests.get(url,headers = headers) response.encoding = 'utf8' html = response.text root = etree.HTML(html) # node_list = root.xpath("//div[@class='unitPrice']/span/text()") #单价51891元/平米 node_list = root.xpath("//div/@data-price") #data-price = "51891" total_number += len(node_list) for node in node_list: total_money += float(node) average = total_money // total_number total_money = 0 total_number = 0 data_average.append(str(average)) print data_average title = ["日期","碑林","未央","灞桥","新城区","长安", "莲湖","雁塔","高陵","经开","西咸","曲江","高新"] # 创建一个工作表的对象 # workbook = xlwt.Workbook(encoding="utf-8") # sheet = workbook.add_sheet('信息表') # for i in range(len(title)): # sheet.write(0,i,title[i]) # for j in range(len(data_average)): # sheet.write(1,j,data_average[j]) # workbook.save(u"学生信息表.xls") read_file = xlrd.open_workbook(u"学生信息表.xls", formatting_info=True) # formatting_info=True:保留Excel的原格式 # 将文件复制到内存 write_data = copy(read_file) # 读取复制后文件的sheet1 write_save = write_data.get_sheet(0) # 写入数据 for j in range(len(data_average)): write_save.write(2,j,data_average[j]) #row需要写入配置文件下次自动读取 # write_save.write(4, 3, 1000) # 保存写入数据后的文件到原文件路径 write_data.save(u"学生信息表.xls")

来源:博客园

作者:南山散人

链接:https://www.cnblogs.com/python-kp/p/11425884.html

Recommend