每当我使用
parse_quote
函数时,什么也没有发生,我只是得到一个空白的 CSV 文件,但如果我将所有报价项放入 parse
函数中,它就能够抓取报价。我做错了什么?
def parse(self, response):
#scraping only the page
quotes = response.xpath("//div[@class='quote']")
for quote in quotes:
self.parse_quotes(quote)
#moving to the next page
next_page = response.xpath("//li[@class='next']/a/@href").get()
if next_page is not None:
next_page_url = "https://quotes.toscrape.com/"+next_page
#going to next page and then scraping the info of the page
yield response.follow(next_page_url,callback=self.parse)
# make a function to yield all the fields
def parse_quotes(self, response):
quote_item = QuoteItems()
quote_item['Quote'] = response.xpath(".//span[@itemprop='text']/text()").get(),
quote_item['Author'] = response.xpath(".//span//small[@class='author']/text()").get(),
quote_item['Tags'] = response.xpath(".//div[@class='tags']//a[@class='tag']/text()").getall(),
yield quote_item
您需要退回该物品并交出它。
import scrapy
class QuoteItems(scrapy.Item):
Quote = scrapy.Field()
Author = scrapy.Field()
Tags = scrapy.Field()
class ExampleSpider(scrapy.Spider):
name = "example_spider"
start_urls = ["https://quotes.toscrape.com/"]
def parse(self, response):
# scraping only the page
quotes = response.xpath("//div[@class='quote']")
for quote in quotes:
yield self.parse_quotes(quote)
# moving to the next page
next_page = response.xpath("//li[@class='next']/a/@href").get()
if next_page:
next_page_url = "https://quotes.toscrape.com/" + next_page
# going to next page and then scraping the info of the page
yield response.follow(next_page_url)
def parse_quotes(self, response):
quote_item = QuoteItems()
quote_item['Quote'] = response.xpath(".//span[@itemprop='text']/text()").get()
quote_item['Author'] = response.xpath(".//span//small[@class='author']/text()").get()
quote_item['Tags'] = response.xpath(".//div[@class='tags']//a[@class='tag']/text()").getall()
return quote_item