当我尝试将项目从 Scrapy 写入 DBeaver 中的 postgres 数据库时。它只是第一次创建表,然后将写入Postgres。当我在抓取时导出 JSON 文件时,代码工作正常。
我正在使用这个教程。
这里是pipelines.py代码
from itemadapter import ItemAdapter
import psycopg2
class FigurePipeline:
def __init__(self):
## Connection Details
hostname = 'localhost'
username = 'hoang'
password = '' # your password
database = 'scraping'
## Create/Connect to database
self.connection = psycopg2.connect(host=hostname, user=username, password=password, dbname=database)
## Create cursor, used to execute commands
self.cur = self.connection.cursor()
## Create quotes table if none exists
self.cur.execute("""
CREATE TABLE IF NOT EXISTS figure (
id SERIAL PRIMARY KEY,
url VARCHAR(255) NOT NULL,
name VARCHAR(255) NOT NULL,
price INTEGER,
images TEXT[] NOT NULL,
hash VARCHAR(255) NOT NULL,
date TIMESTAMP NOT NULL
)
""")
self.connection.commit()
def process_item(self, item, spider):
## Check to see if name and hash is already in database
check_query = "SELECT * FROM figure WHERE name = %s AND hash = %s"
try:
self.cur.execute(check_query, (item['name'],item['hash']))
result = self.cur.fetchone()
if result:
# The item is already in the database, do not save it again
self.logger.debug("Item already in database, skipping...")
else:
try:
## Define insert statement
self.cur.execute("""
INSERT INTO figure (url, name, price, images, hash, date)
VALUES (%s, %s, %s, %s, %s, %s)
ON CONFLICT (name) DO UPDATE
SET price = EXCLUDED.price, hash = EXCLUDED.hash, date = EXCLUDED.date
""", (
item["url"],
item["name"],
item["price"],
str(item["images"]),
item["hash"],
item["date"]
))
## Execute insert of data into database
self.connection.commit()
except Exception as e:
self.connection.rollback()
except Exception as e:
self.connection.rollback()
## Execute insert of data into database
self.connection.commit()
return item
def close_spider(self, spider):
## Close cursor & connection to database
self.cur.close()
self.connection.close()
起初,init 最后没有 self.connection.commit(),所以它甚至没有创建表。我添加它以便创建表。我希望记录下从 scrapy 中抓取的内容来填写表格