[Andrei Alexandrescu在talk that he presented at cppcon中提到,当定义并使用__always inline宏时,gcc并不总是内联的,对于__never_inline,反之亦然。我找不到有关发生哪种情况的大量文档,所以有人告诉我吗?
我找不到有关发生哪种情况的太多文档,所以有人告诉我?
您能提供一个代码示例吗?
[将函数作为函数指针传递,并从不同的转换单元(甚至从相同的转换单元,假设优化为-O0或-Os进行调用,并且对大小更佳,或者实际上只是...“幸运”)进行有效调用不允许内联函数:
cat << EOF > main.c
__attribute__((__always_inline__))
static inline
void f(void) {
printf("Hey buddy!\n");
}
extern void call_fp(void (*fp)(void));
int main() {
call_fp(f);
}
EOF
cat << EOF > g.c
void call_fp(void (*fp)(void)) {
printf("Hey pal!");
fp();
}
EOF
编译检查:
$ gcc -g -Ofast main.c g.c && objdump -S ./a.out | grep '<call_fp>:'
00000000000011a0 <call_fp>:
void call_fp(void (*fp)(void)) {
11a0: 53 push %rbx
printf("Hey pal!");
11a1: 31 c0 xor %eax,%eax
void call_fp(void (*fp)(void)) {
11a3: 48 89 fb mov %rdi,%rbx
printf("Hey pal!");
11a6: 48 8d 3d 62 0e 00 00 lea 0xe62(%rip),%rdi # 200f <_IO_stdin_used+0xf>
11ad: e8 8e fe ff ff callq 1040 <printf@plt>
fp();
11b2: 48 89 d8 mov %rbx,%rax
}
11b5: 5b pop %rbx
fp();
################################### vvvv NOT INLINED ######################
11b6: ff e0 jmpq *%rax
11b8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
11bf: 00
您仍然可以使用LTO在翻译部门之间进行优化:
$ gcc -g -flto -Ofast main.c g.c && objdump -S ./a.out | grep '<main>:' -A30
int main() {
1050: 48 83 ec 08 sub $0x8,%rsp
void call_fp(void (*fp)(void)) {
printf("Hey pal!");
1054: 48 8d 3d a9 0f 00 00 lea 0xfa9(%rip),%rdi # 2004 <_IO_stdin_used+0x4>
105b: 31 c0 xor %eax,%eax
105d: e8 de ff ff ff callq 1040 <printf@plt>
printf("Hey buddy!\n");
1062: 48 8d 3d a4 0f 00 00 lea 0xfa4(%rip),%rdi # 200d <_IO_stdin_used+0xd>
####################### Both call_fp() and f() were inlined!!!!!!!!!!!!!
1069: e8 c2 ff ff ff callq 1030 <puts@plt>
call_fp(f);
}
106e: 31 c0 xor %eax,%eax
1070: 48 83 c4 08 add $0x8,%rsp
1074: c3 retq
1075: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
107c: 00 00 00
107f: 90 nop