如何组织二进制解析器的代码?

问题描述 投票:0回答:1

我正在为 ghidra 风格的逆向工程工具构建 Mach-O 64 位二进制解析器。 我希望程序仅使用文件格式标识符以可读的人类语言输出我们所处的位置。

让我给你举个例子:

0x100000004: cf fa ed fe
0x100000008: 0c 00 00 01
0x10000000c: 00 00 00 00
0x100000010: 02 00 00 00
0x100000014: 11 00 00 00
0x100000018: 20 04 00 00
0x10000001c: 85 00 20 00
0x100000020: 00 00 00 00
0x100000024: 19 00 00 00 LC_SEGMENT_64

这里,LC_SEGMENT_64 位于它开始的一侧,我知道这一点是因为 LC_SEGMENT_64 标识符是 0x19。但如果我对每个可能的 Mach-O 标识符都这样做,它就会变得混乱。如何在不使用 5 万条 if-else 语句的情况下以良好的方式实现这一点?

我的atm代码:

#include <errno.h>
#include <mach-o/loader.h>
#include <mach/machine.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>

#define BUFFER_SIZE 4
#define ERROR(msg) fprintf(stderr, "ERROR: %s | %s\n", msg, strerror(errno));

void HexPrinter(uint32_t buffer, FILE *binary) {
  uint64_t mem_addr = 0x10000000;
  fread(&buffer, 1, BUFFER_SIZE, binary);
  if (buffer != MH_MAGIC_64) {
    ERROR("Not a 64-bit Mach-O binary");
    return;
  } else {
    printf("0x%llx: %02x %02x %02x %02x\n", mem_addr, (buffer & 0xFF),
           ((buffer >> 9) & 0xFF), ((buffer >> 16) & 0xFF),
           ((buffer >> 24) & 0xFF));
  }

  while ((fread(&buffer, 1, BUFFER_SIZE, binary)) == BUFFER_SIZE) {
    printf("0x%llx: %02x %02x %02x %02x\t", mem_addr, (buffer & 0xFF),
           ((buffer >> 9) & 0xFF), ((buffer >> 16) & 0xFF),
           ((buffer >> 24) & 0xFF));

/* I don't want to write one of these for each identifier */
    if (buffer == LC_SEGMENT_64) {
      printf("LC_SEGMENT_64\n");
    } else {
      printf("\n");
    }

    mem_addr += BUFFER_SIZE;
  }

  if (ferror(binary)) {
    ERROR("Error reading file");
    fclose(binary);
    return;
  }
}

int main(int argc, char *argv[1]) {
  FILE *binary;
  char *pathname = argv[1];

  uint32_t buffer;

  if (!argv[1]) {
    ERROR("Usage: ./nibBrev <pathname>");
    return (-1);
  }

  binary = fopen(pathname, "r");
  if (!binary) {
    ERROR("Couldn't open file");
    return (-1);
  }

  HexPrinter(buffer, binary);

  fclose(binary);
  return 0;
}
c macos binary reverse-engineering arm64
1个回答
0
投票

如果你的内存不是非常不足,你可以制作一个查找表:

const char *identifiers[65536] = {
    [LC_SEGMENT_64] = "LC_SEGMENT_64",
    [SOMETHING_ELSE] = "SOMETHING_ELSE",
    // ...
};

然后代替

if (buffer == LC_SEGMENT_64) {
      printf("LC_SEGMENT_64\n");
} else if (buffer == SOMETHING_ELSE) {
      printf("SOMETHING_ELSE\n");
} else if (buffer == ...) {
      printf("...\n");
} else {
      printf("\n");
}

您只需要一个

if

const char *id = identifiers[buffer]; // get a const char* from the table

if (id) {          // check if it had a string in the lookup table
    puts(id);      // then print the string
} else {           // else
    putchar('\n'); // print a newline
}
© www.soinside.com 2019 - 2024. All rights reserved.