我使用 scrapy 从亚马逊废弃产品,但它只返回每个页面的第一个产品并转到下一页我的代码有什么问题
import scrapy
from ..items import AmazonItem
class AmazonSpSpider(scrapy.Spider):
name = "amazon_sp"
allowed_domains = ["www.amazon.com"]
start_urls = ["https://www.amazon.com/s?k=laptop"]
page_number = 1
def parse(self, response):
items = response.css('h2 a.a-text-normal::attr(href)').getall()
print(items)
for result in items:
item_link = "https://www.amazon.com/" + result
print("Hi I am in loop")
yield scrapy.Request(url=item_link, callback=self.parse_item)
next_page = f"https://www.amazon.com/s?k=laptop&page={AmazonSpSpider.page_number}"
print('I am not in loop')
AmazonSpSpider.page_number += 1
yield response.follow(url=next_page, callback=self.parse)
def parse_item(self, response):
item = AmazonItem()
item['title'] = response.css('#productTitle::text').get()
item['image_link'] = response.css('imgTagWrapperId img::attr(src)').get()
item['price'] = response.css('.a-price span::text').get()
item['brand'] = response.css('.po-brand .po-break-word::text').get()
item['model'] = response.css('.po-model_name .po-break-word::text').get()
item['rating'] = response.css('#acrPopover .a-color-base::text').get()
yield item
请更改
items = response.css('h2 a.a-text-normal::attr(href)').getall()
到
items = response.css('h2 a.a-text-normal::attr(href)')
然后它将处理您那里的项目。你就可以走了。请更新您的选择器。