我正在尝试使用 Pandas 将数据从 parquet 文件加载到 SQL Server 表。
这是我遇到的错误
cursor.execute()
。
Traceback (most recent call last): File "C:\\Users\\path\\redshift.py", line 138, in <module> cursor.execute(insert_query, row) File "src\\pymssql\\_pymssql.pyx", line 447, in pymssql._pymssql.Cursor.execute File "src\\pymssql\\_mssql.pyx", line 1125, in pymssql._mssql.MSSQLConnection.execute_query File "src\\pymssql\\_mssql.pyx", line 1156, in pymssql._mssql.MSSQLConnection.execute_query File "src\\pymssql\\_mssql.pyx", line 1273, in pymssql._mssql.MSSQLConnection.format_and_run_query File "src\\pymssql\\_mssql.pyx", line 1295, in pymssql._mssql.MSSQLConnection.format_sql_command File "src\\pymssql\\_mssql.pyx", line 2084, in pymssql._mssql._substitute_params File "src\\pymssql\\_mssql.pyx", line 2069, in pymssql._mssql._quote_data File "src\\pymssql\\_mssql.pyx", line 1986, in pymssql._mssql._quote_simple_value File "C:\Users\pwcsreddy\AppData\Roaming\Python\Python312\site-packages\pandas\core\generic.py", line 1577, in __nonzero__ raise ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
该错误看起来与 Pandas 有关,但它是在插入表时抛出错误,而不是在执行 Pandas 代码时抛出错误。
这是我的代码。
import pymssql
import pandas as pd
import duckdb
csv_file_path = "C:\\path\\pp2.snappy.parquet"
df = pd.read_parquet(csv_file_path)
columns = df.columns.tolist()
df = df[columns]
df = df.fillna(0)
column_names = ', '.join(columns)
placeholders = ', '.join(['?'] * len(columns))
conn = pymssql.connect()
cursor = conn.cursor()
datetime_columns = []
df2 = duckdb.sql("describe select * from read_parquet('pp2.snappy.parquet')").df()
for i, j in zip(df2["column_name"], df2["column_type"]):
if "TIMESTAMP" in j:
datetime_columns.append(i)
for index, row in df[columns].iterrows():
insert_query = f"""
INSERT INTO dbo.table_name ({column_names})
VALUES ( {placeholders} );
"""
for dt_col in datetime_columns:
value = getattr(row, dt_col)
if not pd.isna(value):
setattr(row, dt_col, pd.to_datetime(value))
else:
setattr(row, dt_col, 0)
print(insert_query, dict(row))
cursor.execute(insert_query, row)
conn.commit()
这是我得到的 print 语句的输出。
INSERT INTO dbo.tablename (id, contact, createuserid, publicid, active, beanversion, archivepartition, retired, createtime, lastupdatetime, account, updateuserid, updatetime, subtype, temporarylastupdatetime, frozensetid)
VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );
{'id': 501, 'contact': 503, 'createuserid': 3, 'publicid': 'pc:SCPdF-JQE54GgYqF5JYxm', 'active': True, 'beanversion': 0, 'archivepartition': 0.0, 'retired': 0, 'createtime': Timestamp('2024-08-05 08:28:34.417000+0000', tz='UTC'), 'lastupdatetime': Timestamp('2024-08-05 08:28:34.417000+0000', tz='UTC'), 'account': 301, 'updateuserid': 3, 'updatetime': Timestamp('2024-08-05 08:28:34.417000+0000', tz='UTC'), 'subtype': 1, 'temporarylastupdatetime': Timestamp('1970-01-01 00:00:00'), 'frozensetid': 0.0}
将字典传递给
.execute
函数:
cursor.execute(insert_query, dict(row))
(与你的
print
相同)