我正在尝试将包含大量列的 CSV 文件发送到 MySQL 数据库。运行我的代码后,我遇到了此错误消息:
Traceback (most recent call last):
File "/home/dan-brown/apollo/src/cicflowmeter/datbase_handling.py", line 24, in <module>
db_post()
File "/home/dan-brown/apollo/src/cicflowmeter/datbase_handling.py", line 22, in db_post
mycursor.execute(query, (row,))
File "/home/dan-brown/PythonVenv/my-venv/lib/python3.12/site-packages/mysql/connector/cursor_cext.py", line 350, in execute
stmt = RE_PY_PARAM.sub(psub, stmt)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/dan-brown/PythonVenv/my-venv/lib/python3.12/site-packages/mysql/connector/cursor_cext.py", line 115, in __call__
raise ProgrammingError( mysql.connector.errors.ProgrammingError: Not enough parameters for the SQL statement
据我了解,这意味着我在查询中请求的变量数量与 CSV 文件中读取的变量数量不匹配,但我多次仔细检查,只找到相同的 84 个特征。这是代码:
import mysql.connector
#connect to database
db = mysql.connector.connect(
host="localhost",
user="root",
passwd="root",
database="Apollo"
)
#active user
mycursor = db.cursor()
#setup table. Should be called on install
def db_setup():
mycursor.execute("CREATE TABLE NetworkLogs (id INT AUTO_INCREMENT PRIMARY KEY, src_ip VARCHAR(15), dst_ip VARCHAR(15), src_port INT, dst_port INT, src_mac VARCHAR(17), dst_mac VARCHAR(17), protocol INT, timestamp DATETIME, flow_duration FLOAT, flow_byts_s FLOAT, flow_pkts_s FLOAT, fwd_pkts_s FLOAT, bwd_pkts_s FLOAT, tot_fwd_pkts INT, tot_bwd_pkts INT, totlen_fwd_pkts INT, totlen_bwd_pkts INT, fwd_pkt_len_max FLOAT, fwd_pkt_len_min FLOAT, fwd_pkt_len_mean FLOAT, fwd_pkt_len_std FLOAT, bwd_pkt_len_max FLOAT, bwd_pkt_len_min FLOAT, bwd_pkt_len_mean FLOAT, bwd_pkt_len_std FLOAT, pkt_len_max FLOAT, pkt_len_min FLOAT, pkt_len_mean FLOAT, pkt_len_std FLOAT, pkt_len_var FLOAT, fwd_header_len INT, bwd_header_len INT, fwd_seg_size_min INT, fwd_act_data_pkts INT, flow_iat_mean FLOAT, flow_iat_max FLOAT, flow_iat_min FLOAT, flow_iat_std FLOAT, fwd_iat_tot FLOAT, fwd_iat_max FLOAT, fwd_iat_min FLOAT, fwd_iat_mean FLOAT, fwd_iat_std FLOAT, bwd_iat_tot FLOAT, bwd_iat_max FLOAT, bwd_iat_min FLOAT, bwd_iat_mean FLOAT, bwd_iat_std FLOAT, fwd_psh_flags INT, bwd_psh_flags INT, fwd_urg_flags INT, bwd_urg_flags INT, fin_flag_cnt INT, syn_flag_cnt INT, rst_flag_cnt INT, psh_flag_cnt INT, ack_flag_cnt INT, urg_flag_cnt INT, ece_flag_cnt INT, down_up_ratio FLOAT, pkt_size_avg FLOAT, init_fwd_win_byts INT, init_bwd_win_byts INT, active_max FLOAT, active_min FLOAT, active_mean FLOAT, active_std FLOAT, idle_max FLOAT, idle_min FLOAT, idle_mean FLOAT, idle_std FLOAT, fwd_byts_b_avg FLOAT, fwd_pkts_b_avg FLOAT, bwd_byts_b_avg FLOAT, bwd_pkts_b_avg FLOAT, fwd_blk_rate_avg FLOAT, bwd_blk_rate_avg FLOAT, fwd_seg_size_avg FLOAT, bwd_seg_size_avg FLOAT, cwe_flag_count INT, subflow_fwd_pkts INT, subflow_bwd_pkts INT, subflow_fwd_byts INT, subflow_bwd_byts INT)")
#inserts recorded flows to db
def db_post():
with open('src/cicflowmeter/test1.csv', 'r') as f:
for row in f:
query = "INSERT INTO NetworkLogs (src_ip, dst_ip, src_port, dst_port, src_mac, dst_mac, protocol, timestamp, flow_duration, flow_byts_s, flow_pkts_s, fwd_pkts_s, bwd_pkts_s, tot_fwd_pkts, tot_bwd_pkts, totlen_fwd_pkts, totlen_bwd_pkts, fwd_pkt_len_max, fwd_pkt_len_min, fwd_pkt_len_mean, fwd_pkt_len_std, bwd_pkt_len_max, bwd_pkt_len_min, bwd_pkt_len_mean, bwd_pkt_len_std, pkt_len_max, pkt_len_min, pkt_len_mean, pkt_len_std, pkt_len_var, fwd_header_len, bwd_header_len, fwd_seg_size_min, fwd_act_data_pkts, flow_iat_mean, flow_iat_max, flow_iat_min, flow_iat_std, fwd_iat_tot, fwd_iat_max, fwd_iat_min, fwd_iat_mean, fwd_iat_std, bwd_iat_tot, bwd_iat_max, bwd_iat_min, bwd_iat_mean, bwd_iat_std, fwd_psh_flags, bwd_psh_flags, fwd_urg_flags, bwd_urg_flags, fin_flag_cnt, syn_flag_cnt, rst_flag_cnt, psh_flag_cnt, ack_flag_cnt, urg_flag_cnt, ece_flag_cnt, down_up_ratio, pkt_size_avg, init_fwd_win_byts, init_bwd_win_byts, active_max, active_min, active_mean, active_std, idle_max, idle_min, idle_mean, idle_std, fwd_byts_b_avg, fwd_pkts_b_avg, bwd_byts_b_avg, bwd_pkts_b_avg, fwd_blk_rate_avg, bwd_blk_rate_avg, fwd_seg_size_avg, bwd_seg_size_avg, cwe_flag_count, subflow_fwd_pkts, subflow_bwd_pkts, subflow_fwd_byts, subflow_bwd_byts) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')"
mycursor.execute(query, (row,))
db.commit()
db_post()
这是正在读取的 CSV 文件:
192.168.1.16,192.168.1.255,59222,15600,64:e7:d8:33:f6:76,ff:ff:ff:ff:ff:ff,17,2024-10-05 18:07:59,0.0,0,0,0,0,1,0,77,0,77.0,77.0,77.0,0.0,0.0,0.0,0.0,0.0,77,77,77.0,0.0,0.0,8,0,8,1,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0,0,0,0,0,0,0.0,77.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,77.0,0.0,0,1,0,77,0
我尝试了多个论坛帖子,但尚未找到答案。
如果布局不正确或者是一个简单的错误,我深表歉意。我不是经验丰富的程序员,任何帮助将不胜感激。
谢谢你。
存在多个问题:
csv.reader()
。row
包装在一个元组中,因此它只是一个参数值。只需单独使用 row
即可。%s
占位符不应加引号。这使得它们成为字面意思,并且它们不会被参数值替换。#inserts recorded flows to db
def db_post():
query = "INSERT INTO NetworkLogs (src_ip, dst_ip, src_port, dst_port, src_mac, dst_mac, protocol, timestamp, flow_duration, flow_byts_s, flow_pkts_s, fwd_pkts_s, bwd_pkts_s, tot_fwd_pkts, tot_bwd_pkts, totlen_fwd_pkts, totlen_bwd_pkts, fwd_pkt_len_max, fwd_pkt_len_min, fwd_pkt_len_mean, fwd_pkt_len_std, bwd_pkt_len_max, bwd_pkt_len_min, bwd_pkt_len_mean, bwd_pkt_len_std, pkt_len_max, pkt_len_min, pkt_len_mean, pkt_len_std, pkt_len_var, fwd_header_len, bwd_header_len, fwd_seg_size_min, fwd_act_data_pkts, flow_iat_mean, flow_iat_max, flow_iat_min, flow_iat_std, fwd_iat_tot, fwd_iat_max, fwd_iat_min, fwd_iat_mean, fwd_iat_std, bwd_iat_tot, bwd_iat_max, bwd_iat_min, bwd_iat_mean, bwd_iat_std, fwd_psh_flags, bwd_psh_flags, fwd_urg_flags, bwd_urg_flags, fin_flag_cnt, syn_flag_cnt, rst_flag_cnt, psh_flag_cnt, ack_flag_cnt, urg_flag_cnt, ece_flag_cnt, down_up_ratio, pkt_size_avg, init_fwd_win_byts, init_bwd_win_byts, active_max, active_min, active_mean, active_std, idle_max, idle_min, idle_mean, idle_std, fwd_byts_b_avg, fwd_pkts_b_avg, bwd_byts_b_avg, bwd_pkts_b_avg, fwd_blk_rate_avg, bwd_blk_rate_avg, fwd_seg_size_avg, bwd_seg_size_avg, cwe_flag_count, subflow_fwd_pkts, subflow_bwd_pkts, subflow_fwd_byts, subflow_bwd_byts) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" with open('src/cicflowmeter/test1.csv', 'r') as f:
csv_f = csv.reader(f)
for row in csv_f:
mycursor.execute(query, row)
db.commit()
也不需要每次循环都重新分配
query
,它永远不会改变。
您还可以使用
mycursor.executemany()
更有效地完成此操作。它需要一个二维可迭代对象并自行循环。它将批量处理多行以减少发送到数据库的查询数量。
#inserts recorded flows to db
def db_post():
query = "INSERT INTO NetworkLogs (src_ip, dst_ip, src_port, dst_port, src_mac, dst_mac, protocol, timestamp, flow_duration, flow_byts_s, flow_pkts_s, fwd_pkts_s, bwd_pkts_s, tot_fwd_pkts, tot_bwd_pkts, totlen_fwd_pkts, totlen_bwd_pkts, fwd_pkt_len_max, fwd_pkt_len_min, fwd_pkt_len_mean, fwd_pkt_len_std, bwd_pkt_len_max, bwd_pkt_len_min, bwd_pkt_len_mean, bwd_pkt_len_std, pkt_len_max, pkt_len_min, pkt_len_mean, pkt_len_std, pkt_len_var, fwd_header_len, bwd_header_len, fwd_seg_size_min, fwd_act_data_pkts, flow_iat_mean, flow_iat_max, flow_iat_min, flow_iat_std, fwd_iat_tot, fwd_iat_max, fwd_iat_min, fwd_iat_mean, fwd_iat_std, bwd_iat_tot, bwd_iat_max, bwd_iat_min, bwd_iat_mean, bwd_iat_std, fwd_psh_flags, bwd_psh_flags, fwd_urg_flags, bwd_urg_flags, fin_flag_cnt, syn_flag_cnt, rst_flag_cnt, psh_flag_cnt, ack_flag_cnt, urg_flag_cnt, ece_flag_cnt, down_up_ratio, pkt_size_avg, init_fwd_win_byts, init_bwd_win_byts, active_max, active_min, active_mean, active_std, idle_max, idle_min, idle_mean, idle_std, fwd_byts_b_avg, fwd_pkts_b_avg, bwd_byts_b_avg, bwd_pkts_b_avg, fwd_blk_rate_avg, bwd_blk_rate_avg, fwd_seg_size_avg, bwd_seg_size_avg, cwe_flag_count, subflow_fwd_pkts, subflow_bwd_pkts, subflow_fwd_byts, subflow_bwd_byts) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
with open('src/cicflowmeter/test1.csv', 'r') as f:
csv_f = csv.reader(f)
mycursor.executemany(query, csv_f)
db.commit()