1. 发送请求
  2. 获取源码(re正则表达式,css(parsel),XPath)
  3. 提取数据
  4. 保存数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import requests
import parsel
import csv
import time

def spider():

headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}

i=0
num=0
while True:
time.sleep(1)
url=f'https://sz.lianjia.com/ershoufang/pg{i+1}/'

# 发送请求
response = requests.get(url=url,headers=headers)
# 获取源码
# 提取数据

selector=parsel.Selector(response.text)
href=selector.css('div.leftContent li div.info.clear div.title a::attr(href)').getall()
for link in href:
num = num + 1
html_data=requests.get(url=link,headers=headers).text
selector1 = parsel.Selector(html_data)
try:
title=selector1.css('.title h1::text').get()
price=selector1.css('span.total::text').get()
price_per_meter = selector1.css('.unitPrice span::text').get()
Type = selector1.css('.room div.mainInfo::text').get()
area = selector1.css('.base > div.content > ul > li:nth-child(3)::text').get()
direction = selector1.css('.base > div.content > ul > li:nth-child(7)::text').get()
furnished=selector1.css('.base > div.content > ul > li:nth-child(9) > span::text').get()
lift=selector1.css('.base > div.content > ul > li:nth-child(12) > span::text').get()
floors=selector1.css('.base > div.content > ul > li:nth-child(2)::text').get()
year=selector1.css('.subInfo.noHidden::text').get()
data=[title, price, price_per_meter, Type, area, direction, furnished, lift, floors,year]
print(title, price, price_per_meter, Type, area, direction, furnished, lift, floors,year,sep='|')
write_csv(data)
except:
pass

def write_csv(data_row):
path = "house1.csv"
with open(path,'a',newline='') as f:
csv_write = csv.writer(f)
csv_write.writerow(data_row)


if __name__ == '__main__':
name = ("title", "price", "price_per_meter", "Type", "area", "direction", "furnished", "lift", "floors", "year")
path = "house1.csv"
with open(path,'a',newline='') as f:
csv_write = csv.writer(f)
csv_write.writerow(name)
spider()