我的目标是将 Rust 程序编译为尽可能小的二进制文件并提取机器代码。我做了一个非常简单的程序来测试。
.货物/配置
[target.x86_64-pc-windows-gnu]
rustflags = ["-C", "link-args=-e _start -static -nostartfiles"]
货物.toml
[package]
name = "r"
version = "0.1.0"
edition = "2021"
[profile.release]
panic = "abort"
opt-level = "z"
lto = true
codegen-units = 1
main.rs
#![no_std]
#![no_main]
#[panic_handler]
fn panic(_: &core::panic::PanicInfo) -> ! {
loop {}
}
#[no_mangle]
unsafe fn _start() -> isize {
42
}
我编译
cargo build --target x86_64-pc-windows-gnu --release
,提取.text
部分objcopy -j .text -O binary target/x86_64-pc-windows-gnu/release/r.exe r.bin
,但是当我显示机器代码时,我得到的比我预期的要多:
% objdump -D -b binary -mi386 -Mx86-64 -Mintel -z r.bin
r.bin: file format binary
Disassembly of section .data:
00000000 <.data>:
0: b8 2a 00 00 00 mov eax,0x2a
5: c3 ret
6: 66 90 xchg ax,ax
8: ff (bad)
9: ff (bad)
a: ff (bad)
b: ff (bad)
c: ff (bad)
d: ff (bad)
e: ff (bad)
f: ff 00 inc DWORD PTR [rax]
11: 00 00 add BYTE PTR [rax],al
13: 00 00 add BYTE PTR [rax],al
15: 00 00 add BYTE PTR [rax],al
17: 00 ff add bh,bh
19: ff (bad)
1a: ff (bad)
1b: ff (bad)
1c: ff (bad)
1d: ff (bad)
1e: ff (bad)
1f: ff 00 inc DWORD PTR [rax]
21: 00 00 add BYTE PTR [rax],al
23: 00 00 add BYTE PTR [rax],al
25: 00 00 add BYTE PTR [rax],al
27: 00 .byte 0x0
我预料到了:
% objdump -D -b binary -mi386 -Mx86-64 -Mintel -z r.bin
r.bin: file format binary
Disassembly of section .data:
00000000 <.data>:
0: b8 2a 00 00 00 mov eax,0x2a
5: c3 ret
两个问题:
您提到的这些附加说明并非实际说明;通过查看指向它们的符号,您可以看到一些关于它们可能是什么的线索,例如通过执行
objdump --all -S target/x86_64-pc-windows-gnu/release/rust-test.exe
:
0000000140001000 <_start>:
140001000: b8 2a 00 00 00 mov $0x2a,%eax
140001005: c3 ret
140001006: 66 90 xchg %ax,%ax
0000000140001008 <__CTOR_LIST__>:
140001008: ff (bad)
140001009: ff (bad)
14000100a: ff (bad)
14000100b: ff (bad)
14000100c: ff (bad)
14000100d: ff (bad)
14000100e: ff (bad)
14000100f: ff 00 incl (%rax)
140001011: 00 00 add %al,(%rax)
140001013: 00 00 add %al,(%rax)
140001015: 00 00 add %al,(%rax)
...
0000000140001018 <__DTOR_LIST__>:
140001018: ff (bad)
140001019: ff (bad)
14000101a: ff (bad)
14000101b: ff (bad)
14000101c: ff (bad)
14000101d: ff (bad)
14000101e: ff (bad)
14000101f: ff 00 incl (%rax)
140001021: 00 00 add %al,(%rax)
140001023: 00 00 add %al,(%rax)
140001025: 00 00 add %al,(%rax)
...
这些附加符号
__CTOR_LIST__
和 __DTOR_LIST__
与称为 全局构造函数 的功能相关。此功能(主要在 C++ 中使用)允许您使用 __attribute__((constructor))
和 __attribute__((destructor))
注释函数,这些属性会导致函数分别在程序运行时的开始和结束时被调用。
现在,这个功能恰好是通过在 MinGW 的
.text
部分末尾附加这些函数的地址来实现的(它们的实现方式不仅在针对 Linux 的 GCC 上不同,而且在针对 MinGW 的 LLVM LLD 上也不同) - 请参阅有关实施的更多详细信息,请参阅此答案。 mingw-w64 运行时只是调用 __main
函数中的构造函数:
for (i = nptrs; i >= 1; i--)
{
__CTOR_LIST__[i] ();
}
为了摆脱这些额外的数据,您可以使用自定义链接器脚本
ld
。您可以看到名为 i386pep.x
的默认脚本,并将其放入 MinGW 安装路径(通常为 /usr/x86_64-w64-mingw32/lib/ldscripts/i386pep.x
或类似路径)[1]。您可以将此文件复制到其他地方,当您打开它时,您应该在那里看到以下代码:
.text __image_base__ + ( __section_alignment__ < 0x1000 ? . : __section_alignment__ ) :
{
KEEP (*(SORT_NONE(.init)))
*(.text)
*(SORT(.text$*))
*(.text.*)
*(.gnu.linkonce.t.*)
*(.glue_7t)
*(.glue_7)
. = ALIGN(8);
/* Note: we always define __CTOR_LIST__ and ___CTOR_LIST__ here,
we do not PROVIDE them. This is because the ctors.o startup
code in libgcc defines them as common symbols, with the
expectation that they will be overridden by the definitions
here. If we PROVIDE the symbols then they will not be
overridden and global constructors will not be run.
See PR 22762 for more details.
This does mean that it is not possible for a user to define
their own __CTOR_LIST__ and __DTOR_LIST__ symbols; if they do,
the content from those variables are included but the symbols
defined here silently take precedence. If they truly need to
be redefined, a custom linker script will have to be used.
(The custom script can just be a copy of this script with the
PROVIDE() qualifiers added).
In particular this means that ld -Ur does not work, because
the proper __CTOR_LIST__ set by ld -Ur is overridden by a
bogus __CTOR_LIST__ set by the final link. See PR 46. */
___CTOR_LIST__ = .;
__CTOR_LIST__ = .;
LONG (-1); LONG (-1);
KEEP (*(.ctors));
KEEP (*(.ctor));
KEEP (*(SORT_BY_NAME(.ctors.*)));
LONG (0); LONG (0);
/* See comment about __CTOR_LIST__ above. The same reasoning
applies here too. */
___DTOR_LIST__ = .;
__DTOR_LIST__ = .;
LONG (-1); LONG (-1);
KEEP (*(.dtors));
KEEP (*(.dtor));
KEEP (*(SORT_BY_NAME(.dtors.*)));
LONG (0); LONG (0);
KEEP (*(SORT_NONE(.fini)))
/* ??? Why is .gcc_exc here? */
*(.gcc_exc)
PROVIDE (etext = .);
KEEP (*(.gcc_except_table))
}
由于您不使用 mingw64 运行时,因此您可以安全地删除属于
__CTOR_LIST__
的所有内容并仅保留以下内容:
.text __image_base__ + ( __section_alignment__ < 0x1000 ? . : __section_alignment__ ) :
{
KEEP (*(SORT_NONE(.init)))
*(.text)
*(SORT(.text$*))
*(.text.*)
*(.gnu.linkonce.t.*)
*(.glue_7t)
*(.glue_7)
. = ALIGN(8);
KEEP (*(SORT_NONE(.fini)))
/* ??? Why is .gcc_exc here? */
*(.gcc_exc)
PROVIDE (etext = .);
KEEP (*(.gcc_except_table))
}
之后,您可以修改
.cargo/config.toml
文件以包含自定义链接器脚本的路径:
rustflags = ["-C", "link-args=-e _start -static -nostartfiles -Wl,-T,<path-to-your-script-dir>/i386pep.x"]
就是这样!编译项目后,您可以看到这些奇怪的数据现在消失了:
$ objdump -D -b binary -mi386 -Mx86-64 -Mintel -z r.bin
r.bin: file format binary
Disassembly of section .data:
00000000 <.data>:
0: b8 2a 00 00 00 mov eax,0x2a
5: c3 ret
[1]我无法真正链接到任何“官方”存储库上的此文件,因为它是作为 MinGW/binutils 构建系统的一部分从模板生成的,但您可以在here看到痕迹。