当尝试在本地测试应用程序(Java、Spring boot、hibernate、SQl 服务器作为数据库)时,我注意到存在大量死锁,测试期间应用程序和数据库正在我的计算机上运行。在登台或生产环境中不会发生死锁(但是我的计算机比我计算机上的容器中运行的服务器和数据库强大得多)。
我正在测试执行批量导入的应用程序的特定部分,基本上它消耗表中代表合同和项目事件的行(合同有多个项目)并将它们导入两个表“合同”和“ item”,将它们存储在最终状态。
事件由多个线程消耗,但一个线程仅处理与单个合约相关的事件,因此不同的线程不会与合约和项目表中的相同行交互:
似乎有代码的特定部分(专用于某种类型的合约)触发死锁,我从sql server收集了死锁报告,以及带有事务日志和休眠日志记录的应用程序跟踪,但我不熟悉陷入僵局,我很难弄清楚如何找出到底发生了什么。
相关表DDL:
use INVOICE
go
create table dbo.contract
(
reference varchar(16) not null
primary key,
client_code varchar(64) not null,
created_date date not null,
agency_reference varchar(32),
channel varchar(32),
external_code varchar(64),
new_client bit default 1 not null,
owner_type varchar(32),
status varchar(32),
target_period varchar(7),
type varchar(32),
created_by_isilis bit default 1 not null,
partial_mobility bit default 0 not null,
new_iban varchar(36),
new_bank_account_name varchar(70),
old_iban varchar(70),
emitter_iban varchar(70),
owner_name varchar(110),
signed_date date,
already_invoiced bit default 0 not null,
is_dsp2 bit default 0 not null,
dsp2_status varchar(36)
)
go
create index idx_contract_period_client
on dbo.contract (target_period, client_code)
go
create index idx_contract_period_client_type
on dbo.contract (target_period, client_code, type)
go
use INVOICE
go
create table dbo.item
(
reference varchar(16) not null
primary key,
contract_reference varchar(16) not null,
name varchar(256),
created_date date not null,
sent_date date,
status varchar(32),
target_period varchar(7),
finalised_by_isilis bit default 0 not null,
physical bit default 0 not null,
small bit default 0 not null,
has_email bit default 0 not null,
ics varchar(64),
is_dsp2 bit default 0 not null
)
go
create index ITEM_CONTRACT_REFERENCE
on dbo.item (contract_reference, reference)
go
sql server 死锁报告:
<event name="xml_deadlock_report" package="sqlserver" timestamp="2024-10-10T12:59:49.190Z">
<data name="xml_report">
<value>
<deadlock>
<victim-list>
<victimProcess id="processe00058ca8"/>
</victim-list>
<process-list>
<process id="processe00058ca8" taskpriority="0" logused="0" waitresource="PAGE: 11:1:14032 "
waittime="4961" ownerId="7994918" transactionname="implicit_transaction"
lasttranstarted="2024-10-10T12:59:44.220" XDES="0xe00584470" lockMode="S" schedulerid="1"
kpid="832" status="suspended" spid="78" sbid="0" ecid="0" priority="0" trancount="1"
lastbatchstarted="2024-10-10T12:59:44.227" lastbatchcompleted="2024-10-10T12:59:44.227"
lastattention="1900-01-01T00:00:00.227" clientapp="Microsoft JDBC Driver for SQL Server"
hostname="benoit-XPS" hostpid="422222" loginname="sa" isolationlevel="read committed (2)"
xactid="7994918" currentdb="11" currentdbname="INVOICE" lockTimeout="4294967295"
clientoption1="671088672" clientoption2="128058">
<executionStack>
<frame procname="adhoc" line="1" stmtstart="40" stmtend="3400"
sqlhandle="0x0200000017f60a2aa415215a18d28099390faa3cb6ef87490000000000000000000000000000000000000000">
unknown
</frame>
<frame procname="unknown" line="1"
sqlhandle="0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000">
unknown
</frame>
</executionStack>
<inputbuf>
(@P0 nvarchar(4000))select contracten0_.reference as referenc1_1_1_,
contracten0_.agency_reference as agency_r2_1_1_, contracten0_.already_invoiced as
already_3_1_1_, contracten0_.channel as channel4_1_1_, contracten0_.client_code as
client_c5_1_1_, contracten0_.created_by_isilis as created_6_1_1_, contracten0_.created_date
as created_7_1_1_, contracten0_.dsp2_status as dsp8_1_1_, contracten0_.emitter_iban as
emitter_9_1_1_, contracten0_.external_code as externa10_1_1_, contracten0_.is_dsp2 as
is_dsp11_1_1_, contracten0_.new_bank_account_name as new_ban12_1_1_, contracten0_.new_client
as new_cli13_1_1_, contracten0_.new_iban as new_iba14_1_1_, contracten0_.old_iban as
old_iba15_1_1_, contracten0_.owner_name as owner_n16_1_1_, contracten0_.owner_type as
owner_t17_1_1_, contracten0_.partial_mobility as partial18_1_1_, contracten0_.signed_date as
signed_19_1_1_, contracten0_.status as status20_1_1_, contracten0_.target_period as
target_21_1_1_, contracten0_.type as type22_1_1_, items1_.contract_reference as con
</inputbuf>
</process>
<process id="processe007048c8" taskpriority="0" logused="360" waitresource="PAGE: 11:1:14032 "
waittime="4954" ownerId="7994848" transactionname="implicit_transaction"
lasttranstarted="2024-10-10T12:59:44.203" XDES="0xe41498470" lockMode="IX" schedulerid="13"
kpid="1904" status="suspended" spid="73" sbid="0" ecid="0" priority="0" trancount="2"
lastbatchstarted="2024-10-10T12:59:44.220" lastbatchcompleted="2024-10-10T12:59:44.220"
lastattention="1900-01-01T00:00:00.220" clientapp="Microsoft JDBC Driver for SQL Server"
hostname="benoit-XPS" hostpid="422222" loginname="sa" isolationlevel="read committed (2)"
xactid="7994848" currentdb="11" currentdbname="INVOICE" lockTimeout="4294967295"
clientoption1="671088672" clientoption2="128058">
<executionStack>
<frame procname="adhoc" line="1" stmtstart="712" stmtend="1624"
sqlhandle="0x0200000091dc371910eb29b468f5be145b6ba451aec4ee520000000000000000000000000000000000000000">
unknown
</frame>
<frame procname="unknown" line="1"
sqlhandle="0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000">
unknown
</frame>
</executionStack>
<inputbuf>
(@P0 nvarchar(4000),@P1 bit,@P2 nvarchar(4000),@P3 nvarchar(4000),@P4 bit,@P5 date,@P6
nvarchar(4000),@P7 nvarchar(4000),@P8 nvarchar(4000),@P9 bit,@P10 nvarchar(4000),@P11
bit,@P12 nvarchar(4000),@P13 nvarchar(4000),@P14 nvarchar(4000),@P15 nvarchar(4000),@P16
bit,@P17 date,@P18 nvarchar(4000),@P19 nvarchar(4000),@P20 nvarchar(4000),@P21
nvarchar(4000))update contract set agency_reference= @P0 , already_invoiced= @P1 , channel=
@P2 , client_code= @P3 , created_by_isilis= @P4 , created_date= @P5 , dsp2_status= @P6 ,
emitter_iban= @P7 , external_code= @P8 , is_dsp2= @P9 , new_bank_account_name= @P10 ,
new_client= @P11 , new_iban= @P12 , old_iban= @P13 , owner_name= @P14 , owner_type= @P15 ,
partial_mobility= @P16 , signed_date= @P17 , status= @P18 , target_period= @P19 , type= @P20
where reference= @P21
</inputbuf>
</process>
<process id="processe2c099088" taskpriority="0" logused="896"
waitresource="KEY: 11:72057594044284928 (082f6ca0eea6)" waittime="4950" ownerId="7994913"
transactionname="implicit_transaction" lasttranstarted="2024-10-10T12:59:44.220"
XDES="0xe48904470" lockMode="U" schedulerid="10" kpid="960" status="suspended" spid="84"
sbid="0" ecid="0" priority="0" trancount="2" lastbatchstarted="2024-10-10T12:59:44.223"
lastbatchcompleted="2024-10-10T12:59:44.223" lastattention="1900-01-01T00:00:00.223"
clientapp="Microsoft JDBC Driver for SQL Server" hostname="benoit-XPS" hostpid="422222"
loginname="sa" isolationlevel="read committed (2)" xactid="7994913" currentdb="11"
currentdbname="INVOICE" lockTimeout="4294967295" clientoption1="671088672"
clientoption2="128058">
<executionStack>
<frame procname="adhoc" line="1" stmtstart="712" stmtend="1624"
sqlhandle="0x0200000091dc371910eb29b468f5be145b6ba451aec4ee520000000000000000000000000000000000000000">
unknown
</frame>
<frame procname="unknown" line="1"
sqlhandle="0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000">
unknown
</frame>
</executionStack>
<inputbuf>
(@P0 nvarchar(4000),@P1 bit,@P2 nvarchar(4000),@P3 nvarchar(4000),@P4 bit,@P5 date,@P6
nvarchar(4000),@P7 nvarchar(4000),@P8 nvarchar(4000),@P9 bit,@P10 nvarchar(4000),@P11
bit,@P12 nvarchar(4000),@P13 nvarchar(4000),@P14 nvarchar(4000),@P15 nvarchar(4000),@P16
bit,@P17 date,@P18 nvarchar(4000),@P19 nvarchar(4000),@P20 nvarchar(4000),@P21
nvarchar(4000))update contract set agency_reference= @P0 , already_invoiced= @P1 , channel=
@P2 , client_code= @P3 , created_by_isilis= @P4 , created_date= @P5 , dsp2_status= @P6 ,
emitter_iban= @P7 , external_code= @P8 , is_dsp2= @P9 , new_bank_account_name= @P10 ,
new_client= @P11 , new_iban= @P12 , old_iban= @P13 , owner_name= @P14 , owner_type= @P15 ,
partial_mobility= @P16 , signed_date= @P17 , status= @P18 , target_period= @P19 , type= @P20
where reference= @P21
</inputbuf>
</process>
</process-list>
<resource-list>
<pagelock fileid="1" pageid="14032" dbid="11" subresource="FULL" objectname="INVOICE.dbo.contract"
id="locke5cd4b880" mode="IX" associatedObjectId="72057594045267968">
<owner-list>
<owner id="processe2c099088" mode="IX"/>
</owner-list>
<waiter-list>
<waiter id="processe00058ca8" mode="S" requestType="wait"/>
</waiter-list>
</pagelock>
<pagelock fileid="1" pageid="14032" dbid="11" subresource="FULL" objectname="INVOICE.dbo.contract"
id="locke5cd4b880" mode="IX" associatedObjectId="72057594045267968">
<owner-list>
<owner id="processe00058ca8" mode="S" requestType="wait"/>
</owner-list>
<waiter-list>
<waiter id="processe007048c8" mode="IX" requestType="wait"/>
</waiter-list>
</pagelock>
<keylock hobtid="72057594044284928" dbid="11" objectname="INVOICE.dbo.contract"
indexname="PK__contract__FD90DA98DD2927B2" id="locke52d3bd00" mode="X"
associatedObjectId="72057594044284928">
<owner-list>
<owner id="processe007048c8" mode="X"/>
</owner-list>
<waiter-list>
<waiter id="processe2c099088" mode="U" requestType="wait"/>
</waiter-list>
</keylock>
</resource-list>
</deadlock>
</value>
</data>
死锁报告中出现的查询的执行计划:https://www.brentozar.com/pastetheplan/?id=SkazSLS1kg 和 https://www.brentozar.com/pastetheplan/?id=SJOZvIHJJg
我已在此处上传了包含事务和 SQL 的应用程序日志:https://drive.google.com/file/d/1K4Lt4XwdSr7fgRJi98N82HT8u61X2DJZ/view?usp=drive_link
我正在寻求有关如何改进应用程序以减少死锁量的建议。
正如评论中指出的,此应用程序存在多个问题:
修复第一项是不可能的,因为这意味着要完全重写应用程序。 我通过在实体上实现 Persistable 接口来修复第二项,这稍微改善了情况,但在并行度相对较低(10 个线程)的情况下仍然会发生死锁。
真正解决问题的是删除这两个索引:
create index idx_contract_period_client
on dbo.contract (target_period, client_code)
go
create index idx_contract_period_client_type
on dbo.contract (target_period, client_code, type)
go
这些索引是基数非常低的索引字段,target_period 对于几乎所有行都具有相同的值,虽然 client_code 有更多值,但它仍然具有非常低的基数(即:30 000 行具有相同的值)。
如果没有这些索引,我可以将并行度设置为一百个线程,并且不会发生任何死锁。
但是,我仍然不明白为什么在临时或生产环境中与在我的笔记本电脑上运行相同的应用程序、相同的数据库架构和相同的数据没有问题。唯一的区别是网络延迟,以及这些环境具有成熟的 SQL 服务器而不是我的笔记本电脑上的开发人员版本的容器。