这个问题涉及 Swift 2.2 中作为协议扩展编写的函数的效率。 有谁知道加快运行速度的方法吗?
假设我有一个Int符合的协议号
protocol Number: Equatable, IntegerLiteralConvertible {
init(_ int: Int)
init(_ number: Self)
func +(lhs: Self, rhs: Self) -> Self
func *(lhs: Self, rhs: Self) -> Self
func -(lhs: Self, rhs: Self) -> Self
func /(lhs: Self, rhs: Self) -> Self
}
extension Int: Number { }
现在我想写一个阶乘函数作为 Number 的扩展
extension Number {
func factorialNumber() -> Self {
if self == 0 { return 1 }
return self * (self - 1).factorialNumber()
}
}
我也写了同样的函数作为 Int 的扩展
extension Int {
func factorialInt() -> Int {
if self == 0 { return 1 }
return self * (self - 1).factorialInt()
}
}
当我测量每个函数的运行时间时,发现了巨大的差异。
此屏幕截图启用了整个模块优化。
我猜想在运行时泛型会产生一些开销。有一个更好的方法吗?只编写相同的函数作为 Int、Double、Float 的扩展而不是尝试编写一个协议函数是否有意义。
谢谢
如果我们查看反汇编的函数,我们会看到性能损失的原因 -
factorialInt
的指令比 factorialNumber
少得多。
这是使用 Swift 3 构建并启用
Whole Module Optimization
时这两种方法的外观(性能测量给出的输出与 Swift 2 中问题的输出类似):
0000000000001e80 push rbp ; XREF=__TFE8TestFMWKSi12factorialIntfT_Si+27, __TFE8TestFMWKSi19factorialIntWithMulfT_Si+22
0000000000001e81 mov rbp, rsp
0000000000001e84 push rbx
0000000000001e85 push rax
0000000000001e86 mov rbx, rdi
0000000000001e89 mov eax, 0x1
0000000000001e8e test rbx, rbx
0000000000001e91 je 0x1ea9
0000000000001e93 mov rdi, rbx
0000000000001e96 dec rdi
0000000000001e99 jo 0x1eb0
0000000000001e9b call __TFE8TestFMWKSi12factorialIntfT_Si
0000000000001ea0 imul rbx, rax
0000000000001ea4 mov rax, rbx
0000000000001ea7 jo 0x1eb2
0000000000001ea9 add rsp, 0x8 ; XREF=__TFE8TestFMWKSi12factorialIntfT_Si+17
0000000000001ead pop rbx
0000000000001eae pop rbp
0000000000001eaf ret
0000000000001eb0 ud2 ; XREF=__TFE8TestFMWKSi12factorialIntfT_Si+25
0000000000001eb2 ud2 ; XREF=__TFE8TestFMWKSi12factorialIntfT_Si+39
; endp
0000000000001eb4 nop word [cs:rax+rax]
0000000000001770 push rbp ; XREF=__TFE8TestFMWKPS_6Number15factorialNumberfT_x+1648
0000000000001771 mov rbp, rsp
0000000000001774 push r15
0000000000001776 push r14
0000000000001778 push r13
000000000000177a push r12
000000000000177c push rbx
000000000000177d sub rsp, 0x208
0000000000001784 mov qword [ss:rbp+var_128], rcx
000000000000178b mov rbx, rdx
000000000000178e mov qword [ss:rbp+var_F8], rbx
0000000000001795 mov r14, rsi
0000000000001798 mov qword [ss:rbp+var_C8], rdi
000000000000179f mov rax, qword [ds:rbx]
00000000000017a2 mov qword [ss:rbp+var_110], rax
00000000000017a9 mov rdx, qword [ds:rax]
00000000000017ac mov qword [ss:rbp+var_E8], rdx
00000000000017b3 mov rax, qword [ds:r14-8]
00000000000017b7 mov qword [ss:rbp+var_C0], rax
00000000000017be mov rax, qword [ds:rax+0x28]
00000000000017c2 mov qword [ss:rbp+var_130], rax
00000000000017c9 lea rdi, qword [ss:rbp+var_40]
00000000000017cd mov rsi, rcx
00000000000017d0 mov rdx, r14
00000000000017d3 call rax
00000000000017d5 mov qword [ss:rbp+var_118], rax
00000000000017dc mov r15, qword [ds:rbx+8]
00000000000017e0 mov qword [ss:rbp+var_E0], r15
00000000000017e7 mov rax, qword [ds:r15+0x10]
00000000000017eb mov qword [ss:rbp+var_D0], rax
00000000000017f2 mov rdi, r14
00000000000017f5 mov rsi, r15
00000000000017f8 call qword [ds:r15]
00000000000017fb mov r13, rax
00000000000017fe mov qword [ss:rbp+var_D8], r13
0000000000001805 mov rdi, r13
0000000000001808 mov rsi, r14
000000000000180b mov rdx, r15
000000000000180e call qword [ds:r15+8]
0000000000001812 mov rbx, rax
0000000000001815 mov qword [ss:rbp+var_100], rbx
000000000000181c mov r12, qword [ds:rbx]
000000000000181f mov qword [ss:rbp+var_F0], r12
0000000000001826 mov rax, qword [ds:r13-8]
000000000000182a mov qword [ss:rbp+var_120], rax
0000000000001831 mov rax, qword [ds:rax+0x58]
0000000000001835 mov qword [ss:rbp+var_108], rax
000000000000183c lea rdi, qword [ss:rbp+var_58]
0000000000001840 mov rsi, r13
0000000000001843 call rax
0000000000001845 mov qword [ss:rsp+0x230+var_148], rbx
000000000000184d mov qword [ss:rsp+0x230+var_150], r13
0000000000001855 mov qword [ss:rsp+0x230+var_158], r13
000000000000185d mov qword [ss:rsp+0x230+var_160], 0x0
0000000000001869 mov qword [ss:rsp+0x230+var_168], 0x0
0000000000001875 mov qword [ss:rsp+0x230+var_170], 0x0
0000000000001881 mov qword [ss:rsp+0x230+var_178], 0x0
000000000000188d mov qword [ss:rsp+0x230+var_180], 0x0
0000000000001899 mov qword [ss:rsp+0x230+var_188], 0x0
00000000000018a5 mov qword [ss:rsp+0x230+var_190], 0x0
00000000000018b1 mov qword [ss:rsp+0x230+var_198], 0x0
00000000000018bd mov qword [ss:rsp+0x230+var_1A0], 0x0
00000000000018c9 mov qword [ss:rsp+0x230+var_1A8], 0x0
00000000000018d5 mov qword [ss:rsp+0x230+var_1B0], 0x0
00000000000018e1 mov qword [ss:rsp+0x230+var_1B8], 0x0
00000000000018ea mov qword [ss:rsp+0x230+var_1C0], 0x0
00000000000018f3 mov qword [ss:rsp+0x230+var_1C8], 0x0
00000000000018fc mov qword [ss:rsp+0x230+var_1D0], 0x0
0000000000001905 mov qword [ss:rsp+0x230+var_1D8], 0x0
000000000000190e mov qword [ss:rsp+0x230+var_1E0], 0x0
0000000000001917 mov qword [ss:rsp+0x230+var_1E8], 0x0
0000000000001920 mov qword [ss:rsp+0x230+var_1F0], 0x0
0000000000001929 mov qword [ss:rsp+0x230+var_1F8], 0x0
0000000000001932 mov qword [ss:rsp+0x230+var_200], 0x0
000000000000193b mov qword [ss:rsp+0x230+var_208], 0x0
0000000000001944 mov qword [ss:rsp+0x230+var_210], 0x0
000000000000194d mov qword [ss:rsp+0x230+var_218], 0x0
0000000000001956 mov qword [ss:rsp+0x230+var_220], 0x0
000000000000195f mov qword [ss:rsp+0x230+var_228], 0x0
0000000000001968 mov qword [ss:rsp+0x230+var_230], 0x0
0000000000001970 xor esi, esi
0000000000001972 xor edx, edx
0000000000001974 xor ecx, ecx
0000000000001976 xor r8d, r8d
0000000000001979 xor r9d, r9d
000000000000197c mov rbx, rax
000000000000197f mov rdi, rbx
0000000000001982 call r12
0000000000001985 mov rax, qword [ss:rbp+var_C0]
000000000000198c mov rax, qword [ds:rax+0x58]
0000000000001990 mov qword [ss:rbp+var_138], rax
0000000000001997 lea rdi, qword [ss:rbp+var_70]
000000000000199b mov rsi, r14
000000000000199e call rax
00000000000019a0 mov r12, rax
00000000000019a3 mov rdi, r12
00000000000019a6 mov rsi, rbx
00000000000019a9 mov rdx, r14
00000000000019ac mov rcx, r14
00000000000019af mov r8, r15
00000000000019b2 mov rbx, r13
00000000000019b5 call qword [ss:rbp+var_D0]
00000000000019bb mov rdi, qword [ss:rbp+var_118]
00000000000019c2 mov rsi, r12
00000000000019c5 mov rdx, r14
00000000000019c8 mov rcx, r14
00000000000019cb mov r8, qword [ss:rbp+var_110]
00000000000019d2 call qword [ss:rbp+var_E8]
00000000000019d8 mov r12b, al
00000000000019db mov rax, qword [ss:rbp+var_C0]
00000000000019e2 mov r13, qword [ds:rax+0x18]
00000000000019e6 lea rdi, qword [ss:rbp+var_70]
00000000000019ea mov rsi, r14
00000000000019ed call r13
00000000000019f0 mov rax, qword [ss:rbp+var_120]
00000000000019f7 mov rax, qword [ds:rax+0x18]
00000000000019fb mov qword [ss:rbp+var_E8], rax
0000000000001a02 lea rdi, qword [ss:rbp+var_58]
0000000000001a06 mov rsi, rbx
0000000000001a09 call rax
0000000000001a0b lea rdi, qword [ss:rbp+var_40]
0000000000001a0f mov rsi, r14
0000000000001a12 call r13
0000000000001a15 test r12b, 0x1
0000000000001a19 je 0x1bb1
0000000000001a1f lea r15, qword [ss:rbp+var_40]
0000000000001a23 mov rdi, r15
0000000000001a26 mov rbx, qword [ss:rbp+var_D8]
0000000000001a2d mov rsi, rbx
0000000000001a30 call qword [ss:rbp+var_108]
0000000000001a36 mov r13, rax
0000000000001a39 mov rax, qword [ss:rbp+var_100]
0000000000001a40 mov qword [ss:rsp+0x230+var_148], rax
0000000000001a48 mov qword [ss:rsp+0x230+var_150], rbx
0000000000001a50 mov qword [ss:rsp+0x230+var_158], rbx
0000000000001a58 mov qword [ss:rsp+0x230+var_160], 0x0
0000000000001a64 mov qword [ss:rsp+0x230+var_168], 0x0
0000000000001a70 mov qword [ss:rsp+0x230+var_170], 0x0
0000000000001a7c mov qword [ss:rsp+0x230+var_178], 0x0
0000000000001a88 mov qword [ss:rsp+0x230+var_180], 0x0
0000000000001a94 mov qword [ss:rsp+0x230+var_188], 0x0
0000000000001aa0 mov qword [ss:rsp+0x230+var_190], 0x0
0000000000001aac mov qword [ss:rsp+0x230+var_198], 0x0
0000000000001ab8 mov qword [ss:rsp+0x230+var_1A0], 0x0
0000000000001ac4 mov qword [ss:rsp+0x230+var_1A8], 0x0
0000000000001ad0 mov qword [ss:rsp+0x230+var_1B0], 0x0
0000000000001adc mov qword [ss:rsp+0x230+var_1B8], 0x0
0000000000001ae5 mov qword [ss:rsp+0x230+var_1C0], 0x0
0000000000001aee mov qword [ss:rsp+0x230+var_1C8], 0x0
0000000000001af7 mov qword [ss:rsp+0x230+var_1D0], 0x0
0000000000001b00 mov qword [ss:rsp+0x230+var_1D8], 0x0
0000000000001b09 mov qword [ss:rsp+0x230+var_1E0], 0x0
0000000000001b12 mov qword [ss:rsp+0x230+var_1E8], 0x0
0000000000001b1b mov qword [ss:rsp+0x230+var_1F0], 0x0
0000000000001b24 mov qword [ss:rsp+0x230+var_1F8], 0x0
0000000000001b2d mov qword [ss:rsp+0x230+var_200], 0x0
0000000000001b36 mov qword [ss:rsp+0x230+var_208], 0x0
0000000000001b3f mov qword [ss:rsp+0x230+var_210], 0x0
0000000000001b48 mov qword [ss:rsp+0x230+var_218], 0x0
0000000000001b51 mov qword [ss:rsp+0x230+var_220], 0x0
0000000000001b5a mov qword [ss:rsp+0x230+var_228], 0x0
0000000000001b63 mov qword [ss:rsp+0x230+var_230], 0x0
0000000000001b6b mov esi, 0x1
0000000000001b70 xor edx, edx
0000000000001b72 xor ecx, ecx
0000000000001b74 xor r8d, r8d
0000000000001b77 xor r9d, r9d
0000000000001b7a mov rdi, r13
0000000000001b7d call qword [ss:rbp+var_F0]
0000000000001b83 mov rdi, qword [ss:rbp+var_C8]
0000000000001b8a mov rsi, r13
0000000000001b8d mov rdx, r14
0000000000001b90 mov rcx, r14
0000000000001b93 mov r8, qword [ss:rbp+var_E0]
0000000000001b9a call qword [ss:rbp+var_D0]
0000000000001ba0 mov rdi, r15
0000000000001ba3 mov rsi, rbx
0000000000001ba6 call qword [ss:rbp+var_E8]
0000000000001bac jmp 0x1e5a
0000000000001bb1 mov rbx, qword [ss:rbp+var_F8] ; XREF=__TFE8TestFMWKPS_6Number15factorialNumberfT_x+681
0000000000001bb8 mov rax, qword [ds:rbx+0x28]
0000000000001bbc mov qword [ss:rbp+var_110], rax
0000000000001bc3 lea rdi, qword [ss:rbp+var_40]
0000000000001bc7 mov r12, qword [ss:rbp+var_128]
0000000000001bce mov rsi, r12
0000000000001bd1 mov rdx, r14
0000000000001bd4 mov r15, qword [ss:rbp+var_130]
0000000000001bdb call r15
0000000000001bde mov qword [ss:rbp+var_118], rax
0000000000001be5 mov rax, qword [ds:rbx+0x30]
0000000000001be9 mov qword [ss:rbp+var_120], rax
0000000000001bf0 lea rdi, qword [ss:rbp+var_58]
0000000000001bf4 mov rsi, r12
0000000000001bf7 mov rdx, r14
0000000000001bfa call r15
0000000000001bfd mov r15, rax
0000000000001c00 lea rdi, qword [ss:rbp+var_70]
0000000000001c04 mov rbx, qword [ss:rbp+var_D8]
0000000000001c0b mov rsi, rbx
0000000000001c0e call qword [ss:rbp+var_108]
0000000000001c14 mov qword [ss:rbp+var_108], r13
0000000000001c1b mov r13, rax
0000000000001c1e mov rax, qword [ss:rbp+var_100]
0000000000001c25 mov qword [ss:rsp+0x230+var_148], rax
0000000000001c2d mov qword [ss:rsp+0x230+var_150], rbx
0000000000001c35 mov qword [ss:rsp+0x230+var_158], rbx
0000000000001c3d mov qword [ss:rsp+0x230+var_160], 0x0
0000000000001c49 mov qword [ss:rsp+0x230+var_168], 0x0
0000000000001c55 mov qword [ss:rsp+0x230+var_170], 0x0
0000000000001c61 mov qword [ss:rsp+0x230+var_178], 0x0
0000000000001c6d mov qword [ss:rsp+0x230+var_180], 0x0
0000000000001c79 mov qword [ss:rsp+0x230+var_188], 0x0
0000000000001c85 mov qword [ss:rsp+0x230+var_190], 0x0
0000000000001c91 mov qword [ss:rsp+0x230+var_198], 0x0
0000000000001c9d mov qword [ss:rsp+0x230+var_1A0], 0x0
0000000000001ca9 mov qword [ss:rsp+0x230+var_1A8], 0x0
0000000000001cb5 mov qword [ss:rsp+0x230+var_1B0], 0x0
0000000000001cc1 mov qword [ss:rsp+0x230+var_1B8], 0x0
0000000000001cca mov qword [ss:rsp+0x230+var_1C0], 0x0
0000000000001cd3 mov qword [ss:rsp+0x230+var_1C8], 0x0
0000000000001cdc mov qword [ss:rsp+0x230+var_1D0], 0x0
0000000000001ce5 mov qword [ss:rsp+0x230+var_1D8], 0x0
0000000000001cee mov qword [ss:rsp+0x230+var_1E0], 0x0
0000000000001cf7 mov qword [ss:rsp+0x230+var_1E8], 0x0
0000000000001d00 mov qword [ss:rsp+0x230+var_1F0], 0x0
0000000000001d09 mov qword [ss:rsp+0x230+var_1F8], 0x0
0000000000001d12 mov qword [ss:rsp+0x230+var_200], 0x0
0000000000001d1b mov qword [ss:rsp+0x230+var_208], 0x0
0000000000001d24 mov qword [ss:rsp+0x230+var_210], 0x0
0000000000001d2d mov qword [ss:rsp+0x230+var_218], 0x0
0000000000001d36 mov qword [ss:rsp+0x230+var_220], 0x0
0000000000001d3f mov qword [ss:rsp+0x230+var_228], 0x0
0000000000001d48 mov qword [ss:rsp+0x230+var_230], 0x0
0000000000001d50 mov esi, 0x1
0000000000001d55 xor edx, edx
0000000000001d57 xor ecx, ecx
0000000000001d59 xor r8d, r8d
0000000000001d5c xor r9d, r9d
0000000000001d5f mov rdi, r13
0000000000001d62 call qword [ss:rbp+var_F0]
0000000000001d68 lea rdi, qword [ss:rbp+var_88]
0000000000001d6f mov rsi, r14
0000000000001d72 mov rbx, qword [ss:rbp+var_138]
0000000000001d79 call rbx
0000000000001d7b mov r12, rax
0000000000001d7e mov rdi, r12
0000000000001d81 mov rsi, r13
0000000000001d84 mov rdx, r14
0000000000001d87 mov rcx, r14
0000000000001d8a mov r8, qword [ss:rbp+var_E0]
0000000000001d91 call qword [ss:rbp+var_D0]
0000000000001d97 lea rdi, qword [ss:rbp+var_A0]
0000000000001d9e mov rsi, r14
0000000000001da1 call rbx
0000000000001da3 mov r13, rax
0000000000001da6 mov rdi, r13
0000000000001da9 mov rsi, r15
0000000000001dac mov rdx, r12
0000000000001daf mov rcx, r14
0000000000001db2 mov r8, r14
0000000000001db5 mov r15, qword [ss:rbp+var_F8]
0000000000001dbc mov r9, r15
0000000000001dbf call qword [ss:rbp+var_120]
0000000000001dc5 lea rdi, qword [ss:rbp+var_B8]
0000000000001dcc mov rsi, r14
0000000000001dcf call rbx
0000000000001dd1 mov r12, rax
0000000000001dd4 mov rdi, r12 ; argument #1 for method __TFE8TestFMWKPS_6Number15factorialNumberfT_x
0000000000001dd7 mov rsi, r14 ; argument #2 for method __TFE8TestFMWKPS_6Number15factorialNumberfT_x
0000000000001dda mov rdx, r15 ; argument #3 for method __TFE8TestFMWKPS_6Number15factorialNumberfT_x
0000000000001ddd mov rcx, r13 ; argument #4 for method __TFE8TestFMWKPS_6Number15factorialNumberfT_x
0000000000001de0 call __TFE8TestFMWKPS_6Number15factorialNumberfT_x
0000000000001de5 mov rdi, qword [ss:rbp+var_C8]
0000000000001dec mov rsi, qword [ss:rbp+var_118]
0000000000001df3 mov rdx, r12
0000000000001df6 mov rcx, r14
0000000000001df9 mov r8, r14
0000000000001dfc mov r9, r15
0000000000001dff call qword [ss:rbp+var_110]
0000000000001e05 lea rdi, qword [ss:rbp+var_B8]
0000000000001e0c mov rsi, r14
0000000000001e0f mov rbx, qword [ss:rbp+var_108]
0000000000001e16 call rbx
0000000000001e18 lea rdi, qword [ss:rbp+var_A0]
0000000000001e1f mov rsi, r14
0000000000001e22 mov rax, qword [ss:rbp+var_C0]
0000000000001e29 call qword [ds:rax]
0000000000001e2b lea rdi, qword [ss:rbp+var_88]
0000000000001e32 mov rsi, r14
0000000000001e35 call rbx
0000000000001e37 lea rdi, qword [ss:rbp+var_70]
0000000000001e3b mov rsi, qword [ss:rbp+var_D8]
0000000000001e42 call qword [ss:rbp+var_E8]
0000000000001e48 lea rdi, qword [ss:rbp+var_58]
0000000000001e4c mov rsi, r14
0000000000001e4f call rbx
0000000000001e51 lea rdi, qword [ss:rbp+var_40]
0000000000001e55 mov rsi, r14
0000000000001e58 call rbx
0000000000001e5a mov rax, qword [ss:rbp+var_C8] ; XREF=__TFE8TestFMWKPS_6Number15factorialNumberfT_x+1084
0000000000001e61 add rsp, 0x208
0000000000001e68 pop rbx
0000000000001e69 pop r12
0000000000001e6b pop r13
0000000000001e6d pop r14
0000000000001e6f pop r15
0000000000001e71 pop rbp
0000000000001e72 ret
; endp
0000000000001e73 nop word [cs:rax+rax]
通过协议进行调度是有成本的 - 方法不再静态调度,必须在接收者所属数据类型的调度表中查找。
事实证明,此查找需要完成一些指令,从而导致您注意到的性能差异。
可能值得对此进行更新。截至目前 Swift 5.10,这似乎不再是问题。如果我运行确切的代码(尽管直接在带有
swift run -c release
的可执行文件中运行,而不是通过测试),我会得到 both 选项的相同结果(在我的机器上大约 0.0145 seconds
)。