我正在尝试创建一个脚本来读取 csv 文件并更新我的本地 postgreSQL 表。 这是我要上传的数据。
import psycopg2 as pg
from pandas import read_excel,DataFrame
def execute_query(connection,cursor,query:str):
cursor.execute(query)
connection.commit()
def create_conn():
try:
connection = pg.connect("host='localhost' port='5432' dbname='abc' user='abc' password='abc'")
cursor = connection.cursor()
return connection,cursor
except:
print("Connection failed")
def read_sql_file(filename):
with open(filename, 'r') as file:
sql_queries = file.read()
return sql_queries
def import_csv_data(connection, cursor, csv_file,tablename):
try:
df = read_excel(csv_file)
for i,row in df.iterrows():
values = ",".join(map(str,row.values))
query = f"INSERT INTO {tablename} VALUES {values};"
execute_query(connection,cursor,query)
except Exception as e:
print(f"Error: {e}")
def close_connection(connection,cursor):
connection.close()
cursor.close()
if __name__ == "__main__":
conn, curr = create_conn()
if conn and curr:
## Create table
# sql_file = "player_behaviour_create.sql"
# query = read_sql_file(sql_file)
# execute_query(conn,curr,query)
## Insert Data
import_csv_data(conn,curr,r"Data\data_sample_100_rows.xlsx","player_behaviour")
close_connection(conn,curr)
创建表的 SQL 查询:
CREATE TABLE player_behaviour (
PlayerID INT PRIMARY KEY,
Age INT,
Gender VARCHAR(10),
Location VARCHAR(50),
GameID INT,
PlayTime FLOAT,
FavoriteGame VARCHAR(50),
SessionID BIGINT,
CampaignID INT,
AdsSeen INT,
PurchasesMade INT,
EngagementLevel VARCHAR(10)
);
我怀疑是因为我将所有内容都转换为字符串(import_csv_data),所以发生了数据类型不匹配,但我不确定。如果是这样,我该怎么办? 我还可以使用一些技巧来优化这段代码。
您插入的每一行都必须用括号括起来。
insert into table(a,b,c)
values (1,2,3);
请参阅 SQL 教程:INSERT。
注意,您可以在一次插入中插入多行。这使得批量加载速度显着加快。
insert into table(a,b,c) values
(1,2,3),
(4,5,6),
(7,8,9);