![在这里插入图片描述](https://img-blog.csdnimg.cn/202012052332198.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L01pUmVtZW1iZXI=,size_16,color_FFFFFF,t_70)
import scrapy
import re
from scrapy import Request
from urllib import parse
from SpiderDemo.items import SpiderdemoItem
class CeicSpider(scrapy.Spider):
name = 'ceic'
allowed_domains = ['www.ceic.ac.cn/speedsearch?time=6']
start_urls = ['http://www.ceic.ac.cn/speedsearch?time=6/']
def parse(self, response):
post_nodes = response.xpath('//*[@align="left"]/a/@href').extract()
for post_node in post_nodes:
yield Request(url=parse.urljoin(response.url,post_node),callback=self.parse_detail,dont_filter=True)
next_url = response.xpath('//*[@id="paging"]/div/div/ul/li[last()-1]/a/@href').extract()[0]
if next_url:
yield Request(url=parse.urljoin(response.url,next_url),callback=self.parse,dont_filter=True)
def parse_detail(self,response):
spider_item = SpiderdemoItem()
time = response.xpath('//*[@valign="middle"]/text()').extract()[1].strip()
latitude = response.xpath('//*[@valign="middle"]/text()').extract()[3].replace("°","")
longitude = response.xpath('//*[@valign="middle"]/text()').extract()[5].replace("°","")
depth = response.xpath('//*[@valign="middle"]/text()').extract()[7].replace("千米","")
level = response.xpath('//*[@valign="middle"]/text()').extract()[9]
area = response.xpath('//*[@valign="middle"]/text()').extract()[11]
spider_item["time"] = time
spider_item["latitude"] = latitude
spider_item["longitude"] = longitude
spider_item["depth"] = depth
spider_item["level"] = level
spider_item["area"] = area
return spider_item
Name:Eamonze
Time : 2020/12/5