我正在尝试在Linux CentOS上编写一个工具来跟踪所有生成的进程以及运行的内容。本质上,我有兴趣走所有fork /克隆并从execve()
发出所有命令行。 Strace已经(某些)这样做了,但它也截断了调用和参数。我还想更好地了解ptrace()
的工作原理。
因此,第一个障碍是弄清楚如何使用ptrace()
来进行分叉/克隆而不需要跟踪程序需要分叉自己的副本。我挖了进来,发现了这个怎么样。由于fork是在Linux上使用clone实现的,所以我注意到strace将一些比特放入克隆系统调用中,以便在没有任何额外麻烦的情况下启用子跟踪。
所以,本质上代码只是一个大问题:
while (1) {
int pid = wait3(-1,...);
/* process what happened */
ptrace(PTRACE_SYSCALL, pid,...);
}
这适用于像/bin/sh
这样相对简单的过程,但是,有些过程导致wait()
无限期挂起。我唯一能够确定的是,我正在追踪的过程就是在它的孩子身上执行sys_rt_sigsuspend()
(所以,追踪者的孙子),然后事情就会楔入。
我很好奇是否有一种理智的方式可以调试可能发生的事情。有些事情显然阻止了流程树的进展
这是有问题的程序的源代码:
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
/* For the clone flags
*/
#include <sched.h>
/* #include <errno.h> */
#include <sys/ptrace.h>
#include <sys/user.h>
/* Defines our syscalls like
*/
#include <sys/syscall.h>
#include <sys/reg.h>
#include <stdio.h>
#include <signal.h>
#include <ctype.h>
#include <map>
using namespace std;
char bufstr[4096];
#ifdef __x86_64__
#define REG_ACC RAX
#define REG_ARG1 RDI
#define REG_ARG2 RSI
#else
#define REG_ACC EAX
#define REG_ARG1 EBX
#define REG_ARG2 ECX
#endif
/* Trace control structure per PID that we're tracking
*/
class tcb {
int pid_;
int entering_;
public:
tcb(int pid, int entering = 1) : pid_(pid), entering_(entering) {};
tcb() : pid_(-1) {};
// tcb(const tcb& p) : pid_(pid.pid()), entering_(entering.entering()) {};
int& pid() { return pid_; }
int& entering() { return entering_; }
};
/* Fetch a string from process (pid) at location (ptr). Buf is the place
* to store the data with size limit (size). Return the number of bytes
* copied.
*/
int get_string(int pid, long ptr, char *buf, int size)
{
long data;
char *p = (char *) &data;
int j = 0;
while ((data = ptrace(PTRACE_PEEKTEXT, pid, (void *) ptr, 0)) && j < size) {
int i;
for (i = 0; i < sizeof(data) && j < size; i++, j++) {
if (!(buf[j] = p[i]))
goto done;
}
ptr += sizeof(data);
}
done:
buf[j] = '\0';
return j;
}
int main(int argc, char *argv[])
{
int status = 0;
long scno = 0;
// int entering = 1;
struct user_regs_struct regs;
map<int, tcb> pidTable;
struct sigaction sa;
/* Setup
*/
int pid = fork();
if (!pid && argc) {
if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
perror("ptrace(PTRACE_ME,... ");
exit(1);
}
execvp(argv[1], &argv[1]);
} else {
sa.sa_flags = 0;
sa.sa_handler = SIG_DFL;
sigemptyset(&sa.sa_mask);
sigaction(SIGCHLD, &sa, NULL);
waitpid(pid, &status, 0);
pidTable[pid] = tcb(pid);
fprintf(stderr, "pid is %d\n", pidTable[pid].pid());
while (!pidTable.empty()) {
if (pid > 0) {
//fprintf(stderr, "%d: Restarting %d\n", getpid(), pid);
if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) {
perror("ptrace(PTRACE_SYSCALL,...");
exit(1);
}
}
// waitpid(pid, &status, 0);
// pid = waitpid(-1, &status, 0);
pid = wait3(&status, __WALL, 0);
// fprintf(stderr, "Pid from wait is %d\n", pid);
if (pid < 0) {
perror("waitpid");
break;
} else {
/* fprintf(stderr, "%d: Status is: ", pid); */
/*
if (WIFEXITED(status)) {
fprintf(stderr, "exited");
} else if (WIFSIGNALED(status)) {
fprintf(stderr, "exited");
} else if (WIFSTOPPED(status), "stopped") {
fprintf(stderr, "stopped");
} else if (WIFCONTINUED(status)) {
fprintf(stderr, "continued");
}
fprintf(stderr, "\n");
*/
if (WIFEXITED(status) || WIFSIGNALED(status)) {
/* Probably empty the table here */
pidTable.erase(pid);
fprintf(stderr, "Detect process term/kill %d\n", pid);
/* if (ptrace(PTRACE_DETACH, pid, 0, 0) < 0) {
perror("ptrace");
} */
pid = -1;
continue;
}
}
ptrace(PTRACE_GETREGS, pid, 0, ®s);
#ifdef __x86_64__
scno = regs.orig_rax;
#else
scno = regs.orig_eax;
#endif /* __x86_64__ */
if (scno == SYS_execve) {
fprintf(stderr, "%d: Exec branch\n", pid);
if (pidTable[pid].entering()) {
long ldata, ptr, ptr1;
ptrace(PTRACE_GETREGS, pid, 0, ®s);
#ifdef __x86_64__
ptr = regs.rdi;
#else
ptr = regs.ebx;
#endif /* __x86_64__ */
fprintf(stderr, "%d: exec(", pid);
if (ptr) {
get_string(pid, ptr, bufstr, sizeof(bufstr));
fprintf(stderr, "%s", bufstr);
}
#ifdef __x86_64__
ptr1 = regs.rsi;
#else
ptr1 = regs.ecx;
#endif /* __x86_64__ */
for (; ptr1; ptr1 += sizeof(unsigned long)) {
ptr = ptr1;
/* Indirect through ptr since we have char *argv[] */
ptr = ptrace(PTRACE_PEEKTEXT, pid, (void *) ptr, 0);
if (!ptr)
break;
get_string(pid, ptr, bufstr, sizeof(bufstr));
fprintf(stderr, ", %s", bufstr);
}
fprintf(stderr, ")\n");
pidTable[pid].entering() = 0;
}
else {
long acc = ptrace(PTRACE_PEEKUSER, pid, sizeof(unsigned long) * REG_ACC, 0);
pidTable[pid].entering() = 1;
fprintf(stderr, "%d: Leaving exec: eax is %ld\n", pid, acc);
}
} else if (scno == SYS_fork || scno == SYS_clone) {
fprintf(stderr, "%d: fork/clone branch\n", pid);
if (pidTable[pid].entering()) {
long flags = ptrace(PTRACE_PEEKUSER, pid, (sizeof(unsigned long) * REG_ARG1), 0);
fprintf(stderr, "%d: Entering fork/clone\n", pid);
pidTable[pid].entering() = 0;
if (ptrace(PTRACE_POKEUSER, pid, (sizeof(unsigned long) * REG_ARG1), flags | CLONE_PTRACE &
~(flags & CLONE_VFORK ?
CLONE_VFORK | CLONE_VM : 0)) < 0) {
perror("ptrace");
}
if (ptrace(PTRACE_POKEUSER, pid, (sizeof(unsigned long) * REG_ARG2), 0) < 0) {
perror("ptrace");
}
} else {
// int child;
ptrace(PTRACE_GETREGS, pid, 0, ®s);
#ifdef __x86_64__
fprintf(stderr, "%d: Leaving fork/clone: rax = %ld\n", pid, regs.rax);
#else
fprintf(stderr, "%d: Leaving fork/clone: eax = %ld\n", pid, regs.eax);
#endif
pidTable[pid].entering() = 1;
#ifdef __x86_64__
if (regs.rax <= 0) {
#else
if (regs.eax <= 0) {
#endif
continue;
}
#ifdef __x86_64__
int newpid = regs.rax;
#else
int newpid = regs.eax;
#endif
pidTable[newpid] = tcb(newpid, 0);
//pidTable[newpid] = tcb(newpid, 1);
//pidTable[newpid] = pidTable[pid];
fprintf(stderr, "%d: forked child is %d\n", pid, newpid);
}
} else if (scno == SYS_exit) {
fprintf(stderr, "%d: exit syscall detected\n", pid);
} else if (scno < 0) {
fprintf(stderr, "Negative syscall number for %d\n", pid);
exit(1);
} else {
fprintf(stderr, "%d: Scno is %ld\n", pid, scno);
}
}
}
return 0;
}
有ptrace PTRACE_SETOPTIONS子标志的标志:PTRACE_O_TRACEFORK,PTRACE_O_TRACEEXEC和PTRACE_O_TRACEEXIT。更多内容是ptrace的手册页。
顺便说说。 strace -f -s99999 -e trace=clone,execve
似乎可以提供高质量的结果。要查看strace自己的行为,您可以尝试systemtap,即。
# stap -e 'probe syscall.ptrace {if (execname()=="strace") log(argstr)}' -c 'strace COMMAND'
(当前的systemtap并没有完全正确地打印ptrace参数。)
或者你可以strace strace:
strace -e trace=ptrace strace -f -s99999 -e trace=clone,execve COMMAND
我遇到了完全相同的问题,并通过支撑strace找到了解决方案。
通过waitpid()
参加活动后,您需要打电话
ptrace(PTRACE_GETSIGINFO, pid, NULL, &sig_data)
如果sig_data.si_signo
是SIGTRAP
,你做你现在做的任何事情,但如果没有,你需要存储信号编号并将其用作最后一个参数
ptrace(PTRACE_SYSCALL, pid, 0, sig)
这样,信号(在我的情况下为SIGCHLD
)被正确地转发到tracee。