Scrapy 函数未调用

问题描述 投票:0回答:1

每当我使用

parse_quote
函数时,什么也没有发生,我只是得到一个空白的 CSV 文件,但如果我将所有报价项放入
parse
函数中,它就能够抓取报价。我做错了什么?

def parse(self, response):
         
         #scraping only the page
          
          quotes  = response.xpath("//div[@class='quote']")
          
          for quote in quotes:
                self.parse_quotes(quote)
          

          #moving to the next page
          next_page = response.xpath("//li[@class='next']/a/@href").get()
         

          if next_page is not None:
             next_page_url = "https://quotes.toscrape.com/"+next_page
             
             #going to next page and then scraping the info of the page
             yield response.follow(next_page_url,callback=self.parse)

     
     
     # make a function to yield all the fields

     def parse_quotes(self, response):
       
        quote_item = QuoteItems()

        quote_item['Quote']  = response.xpath(".//span[@itemprop='text']/text()").get(),
        quote_item['Author'] = response.xpath(".//span//small[@class='author']/text()").get(),
        quote_item['Tags']   = response.xpath(".//div[@class='tags']//a[@class='tag']/text()").getall(),
        
        yield quote_item
web-scraping scrapy scrapy-splash
1个回答
0
投票

您需要退回该物品并交出它。

import scrapy


class QuoteItems(scrapy.Item):
    Quote = scrapy.Field()
    Author = scrapy.Field()
    Tags = scrapy.Field()


class ExampleSpider(scrapy.Spider):
    name = "example_spider"
    start_urls = ["https://quotes.toscrape.com/"]

    def parse(self, response):
        # scraping only the page
        quotes = response.xpath("//div[@class='quote']")

        for quote in quotes:
            yield self.parse_quotes(quote)

        # moving to the next page
        next_page = response.xpath("//li[@class='next']/a/@href").get()

        if next_page:
            next_page_url = "https://quotes.toscrape.com/" + next_page

            # going to next page and then scraping the info of the page
            yield response.follow(next_page_url)

    def parse_quotes(self, response):
        quote_item = QuoteItems()

        quote_item['Quote'] = response.xpath(".//span[@itemprop='text']/text()").get()
        quote_item['Author'] = response.xpath(".//span//small[@class='author']/text()").get()
        quote_item['Tags'] = response.xpath(".//div[@class='tags']//a[@class='tag']/text()").getall()

        return quote_item
© www.soinside.com 2019 - 2024. All rights reserved.