我尝试在用户模式下使用QEMU来模拟ARMv9 SME程序,但在执行过程中遇到以下错误:
qemu: uncaught target signal 4 (Illegal instruction) - core dumped
QEMU版本:qemu-9.0.2
目标架构:ARMv9
matmul_opt: // x0: M, x1: K, x2: N, x3: matLeft, x4: matRight, x5: matResult
stp x19, x20, [sp, #-48]!
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
smstart
// constants
cntw x6 // SVLs
mul x22, x6, x1 // SVLs*K
mul x23, x6, x2 // SVLs*N
add x18, x23, x2 // SVLs*N + N
add x11, x4, x2, lsl #2 // Exit condition for N loop
mov x12, #0
cntb x6 // SVLb
mov x14, #0
ptrue pn10.b // Predicate for SME2 VLx2 (a_ptr loads)
whilelt pn8.s, x12, x0, vlx2 // tiles predicate (M dimension)
sub w6, w6, #8 // SVLb-8
.Loop_M:
// Extract tile 0/1 and tile 2/3 predicates (M) from vlx2 predicate.
pext { p2.s, p3.s }, pn8[0]
mov x16, x4 // b_base
mov x9, x5 // c_base
whilelt pn9.b, x16, x11, vlx2 // tiles predicate (N dimension)
.Loop_N:
mov x7, x3 // a_ptr = a_base
mov x17, x16 // b_ptr = b_base
mov x10, x9 // c_ptr0 = c_base
// Extract tile 0/2 and tile 1/3 predicates (N) from vlx2 predicate.
pext { p0.b, p1.b }, pn9[0]
add x8, x3, x22, lsl #2 // a_base + SVLs*K FP32 elms (bytes)
addvl x15, x8, #-1 // Exit condition for K loop
ld1w {z1.s}, p2/z, [x7] // Load 1st vector from a_ptr
zero {za}
ld1w {z2.s-z3.s}, pn9/z, [x17] // Load 2 vectors from b_ptr
fmopa za0.s, p2/m, p0/m, z1.s, z2.s // ZA0+=1st a_ptr vec OP 1st b_ptr vec
ld1w {z5.s}, p3/z, [x7, x22, lsl #2] // Load 2nd vector from a_ptr
addvl x7, x7, #1 // a_ptr += SVLb (bytes)
.Loop_K:
fmopa za2.s, p3/m, p0/m, z5.s, z2.s // ZA2+=2nd a_ptr vec OP 1st b_ptr vec
// ... (rest of the loop body)
cmp x7, x15
b.mi .Loop_K
// ... (rest of the N loop body)
.Loop_store_ZA:
// ... (rest of the store loop body)
cmp w13, w6
b.mi .Loop_store_ZA
// ... (rest of the M loop body)
smstop
ldp x23, x24, [sp, #32]
ldp x21, x22, [sp, #16]
ldp x19, x20, [sp], #48
ret
aarch64-none-elf-as -march=armv9.2-a -mcpu=cortex-a710+sme2 -o test_sme test_sme.S
qemu-aarch64 test_sme
在执行上述程序时,我收到“非法指令”错误。这可能是什么原因?我的 QEMU 版本支持我正在使用的 SME2 指令集吗?我该如何解决这个问题?
任何帮助将不胜感激!
指令“ptrue pn10.b”位于 FEAT_SVE2p1 扩展中,QEMU 尚未实现;类似地,您使用的“while”insn 的形式是 FEAT_SVE2p1。因此,您会收到“非法指令”错误,与在未实现 FEAT_SVE2p1 的真实 CPU 上运行此二进制文件时的情况相同。如果您想在 QEMU 下运行代码,您暂时应该坚持使用 FEAT_SME/FEAT_SVE/FEAT_SVE2。 (FEAT_SVE2p1 和 FEAT_SME2 已在待办事项列表中,但可能还需要一段时间。)
此处列出了当前模拟的一组架构功能: https://www.qemu.org/docs/master/system/arm/emulation.html