import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy import Request
class data(scrapy.Spider):
name = 'data'
start_urls = ['https://nigeriapropertycentre.com/for-sale/']
def parse(self, response):
cards = response.xpath("//div[@class='wp-block property list']")
for card in cards:
# features
Title = card.xpath("//a/h4/text()").get()
Address = card.xpath("//address/*[i]/text()").get()
link = card.xpath("//div[@class='description hidden-xs']/a/@href").get()
request = Request(link, callback=self.parse_details,meta={'Tilte':Title, 'Address': Address, 'link': link})
yield request
# perse pagination
next_page = response.css('li.next a::attr(href)').get()
if next_page is not None:
# go to next page untill no more page
next_page = response.urljoin(next_page)
yield scrapy.Request(next_page, callback=self.parse)
def parse_details(self,response):
# get sq m and other shizz from detail page
Description = response.xpath("//p/text()").getall().replace('\n','')
# use meta, not cb_kwargs with Request!
Title = response.meta['Title']
Address = response.meta['Address']
Link = response.meta['Link']
#
yield {'Tilte': Title, 'Address': Address, 'Description': Description, 'link': Link, }
#main driver
if __name__ =="__main__":
process = CrawlerProcess()
process.crawl(data)
process.start()
以下为错误回复,请指教
回溯(最后一次通话): 文件“C:\Users\Franklin\Desktop\Coding env\Lib\site-packages\scrapy\utils\defer.py”,第 257 行,在 iter_errback 产生下一个(它) ^^^^^^^^ 文件“C:\Users\Franklin\Desktop\Coding env\Lib\site-packages\scrapy\utils\python.py”,第 312 行,在next 返回下一个(self.data) ^^^^^^^^^^^^^^^ 文件“C:\Users\Franklin\Desktop\Coding env\Lib\site-packages\scrapy\utils\python.py”,第 312 行,在next 返回下一个(self.data) ^^^^^^^^^^^^^^^ 文件“C:\Users\Franklin\Desktop\Coding env\Lib\site-packages\scrap