我正在尝试测试一个程序,
fork
和exec
与libcheck(https://github.com/libcheck/check)。
当我正常运行程序时,它可以创建子进程,稍等一下,然后用
kill
来完成它,没有问题。但是,如果我在测试中执行相同的操作,则测试会失败并显示 SIGTERM
。看起来测试进程被杀死了。当使用 Received signal 15 (Terminated)
运行测试时,我什至没有看到
strace
正在执行。
当使用kill
和strace
运行测试时,执行了kill,但程序在CK_FORK=no
期间中止。另外我现在真的不知道如何正确调试它。
测试.c:
wait
我已经从代码中删除了错误检查。
这可以使用
#include <check.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <paths.h>
#include <unistd.h>
START_TEST(test)
{
int process;
/* prevent duplicate output from the child process */
if (0 != fflush (NULL)) abort ();
process = fork ();
if (0 > process) abort ();
/* child process */
if (0 == process)
{
int null_fd;
/* reset signals */
if (SIG_ERR == signal (SIGTERM, SIG_DFL))
{
exit (EXIT_FAILURE);
}
/* Replace stderr with /dev/null, because SoX will
* spam the output with current status data. */
null_fd = open (_PATH_DEVNULL, O_WRONLY);
if (0 > null_fd)
{
exit (EXIT_FAILURE);
}
if (0 > dup2 (null_fd, STDERR_FILENO))
{
exit (EXIT_FAILURE);
}
if (0 > close (null_fd))
{
exit (EXIT_FAILURE);
}
execlp ("/usr/bin/play", (char *) NULL);
/* execvp failed */
exit (EXIT_FAILURE);
}
printf("kill: %d\n", process);
if (0 > kill (process, SIGTERM)) abort ();
if (0 > waitpid (process, NULL, 0)) abort ();
}
END_TEST
Suite * suite (void)
{
Suite *s;
TCase *tc_core;
s = suite_create ("Test");
/* Core test case */
tc_core = tcase_create ("Test");
tcase_add_test (tc_core, test);
suite_add_tcase (s, tc_core);
return s;
}
int main (void)
{
int number_failed;
Suite *s;
SRunner *sr;
s = suite ();
sr = srunner_create (s);
srunner_run_all (sr, CK_NORMAL);
number_failed = srunner_ntests_failed (sr);
srunner_free (sr);
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
}
进行编译执行:
gcc test.c -lcheck_pic -pthread -lrt -lm -lsubunit
$ ./a.out
Running suite(s): Test
kill: 7047
0%: Checks: 1, Failures: 0, Errors: 1
test.c:11:E:Test:test:0: (after this point) Received signal 15 (Terminated)
strace
奇怪的是,偶尔它确实可以工作,这似乎与当前系统负载相关。如果我的 CPU 几乎空闲,成功率约为 50%,当我的 CPU 处于负载状态时,则几乎总是失败。 成功后,看起来像这样:
执行:
$ strace -fe trace=%process,%signal ./a.out
execve("./a.out", ["./a.out"], 0x7ffc8a157ef0 /* 47 vars */) = 0
rt_sigaction(SIGRTMIN, {sa_handler=0x7fd6fd8c9690, sa_mask=[], sa_flags=SA_RESTORER|SA_SIGINFO, sa_restorer=0x7fd6fd8d6140}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7fd6fd8c9730, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7fd6fd8d6140}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
rt_sigaction(SIGALRM, {sa_handler=0x5638b7e93b50, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fd6fd8d6140}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGINT, {sa_handler=0x5638b7e93b50, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fd6fd8d6140}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=0x5638b7e93b50, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fd6fd8d6140}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
Running suite(s): Test
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fd6fd5a0550) = 6995
strace: Process 6995 attached
[pid 6994] wait4(6995, <unfinished ...>
[pid 6995] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fd6fd5a0550) = 6996
strace: Process 6996 attached
kill: 6996
[pid 6995] kill(6996, SIGTERM) = 0
[pid 6996] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=6995, si_uid=1001} ---
[pid 6995] wait4(6996, <unfinished ...>
[pid 6996] rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fd6fd8d6140}, NULL, 8) = 0
[pid 6996] kill(-6995, SIGTERM) = 0
[pid 6995] <... wait4 resumed>NULL, 0, NULL) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
[pid 6995] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=6996, si_uid=1001} ---
[pid 6996] kill(-6995, SIGTERM) = 0
[pid 6996] rt_sigreturn({mask=[]} <unfinished ...>
[pid 6995] rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fd6fd8d6140}, <unfinished ...>
[pid 6996] <... rt_sigreturn resumed>) = 140561350264128
[pid 6996] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=6996, si_uid=1001} ---
[pid 6995] <... rt_sigaction resumed>NULL, 8) = 0
[pid 6995] kill(-6995, SIGTERM) = 0
[pid 6995] kill(-6995, SIGTERM) = 0
[pid 6995] rt_sigreturn({mask=[]} <unfinished ...>
[pid 6996] +++ killed by SIGTERM +++
[pid 6995] <... rt_sigreturn resumed>) = -1 EINTR (Interrupted system call)
[pid 6995] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=6996, si_uid=1001} ---
[pid 6995] +++ killed by SIGTERM +++
<... wait4 resumed>[{WIFSIGNALED(s) && WTERMSIG(s) == SIGTERM}], 0, NULL) = 6995
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=6995, si_uid=1001, si_status=SIGTERM, si_utime=0, si_stime=0} ---
kill(-6995, SIGKILL) = -1 ESRCH (No such process)
0%: Checks: 1, Failures: 0, Errors: 1
test.c:11:E:Test:test:0: (after this point) Received signal 15 (Terminated)
rt_sigaction(SIGALRM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fd6fd8d6140}, NULL, 8) = 0
rt_sigaction(SIGINT, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fd6fd8d6140}, NULL, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fd6fd8d6140}, NULL, 8) = 0
exit_group(1) = ?
+++ exited with 1 +++
Running suite(s): Test
kill: 7074
100%: Checks: 1, Failures: 0, Errors: 0
strace
execve("./a.out", ["./a.out"], 0x7ffd0a5de0d0 /* 47 vars */) = 0
rt_sigaction(SIGRTMIN, {sa_handler=0x7f216e4fa690, sa_mask=[], sa_flags=SA_RESTORER|SA_SIGINFO, sa_restorer=0x7f216e507140}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7f216e4fa730, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f216e507140}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
rt_sigaction(SIGALRM, {sa_handler=0x55988f29db50, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f216e507140}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGINT, {sa_handler=0x55988f29db50, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f216e507140}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=0x55988f29db50, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f216e507140}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
Running suite(s): Test
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f216e31f450) = 137654
wait4(137654, strace: Process 137654 attached
<unfinished ...>
[pid 137654] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f216e31f450) = 137655
strace: Process 137655 attached
kill: 137655
[pid 137655] rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=[TERM], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f216e358d60}, {sa_handler=0x55988f29db50, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f216e507140}, 8) = 0
[pid 137654] kill(137655, SIGTERM) = 0
[pid 137655] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=137654, si_uid=1001} ---
[pid 137654] wait4(137655, <unfinished ...>
[pid 137655] +++ killed by SIGTERM +++
[pid 137654] <... wait4 resumed>NULL, 0, NULL) = 137655
[pid 137654] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=137655, si_uid=1001, si_status=SIGTERM, si_utime=0, si_stime=0} ---
[pid 137654] exit_group(0) = ?
[pid 137654] +++ exited with 0 +++
<... wait4 resumed>[{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 137654
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=137654, si_uid=1001, si_status=0, si_utime=0, si_stime=0} ---
kill(-137654, SIGKILL) = -1 ESRCH (No such process)
100%: Checks: 1, Failures: 0, Errors: 0
rt_sigaction(SIGALRM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f216e507140}, NULL, 8) = 0
rt_sigaction(SIGINT, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f216e507140}, NULL, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f216e507140}, NULL, 8) = 0
exit_group(0) = ?
+++ exited with 0 +++
安装一个 SIGTERM 处理程序,它会杀死整个进程组。 该处理程序由孙进程继承,该进程在 exec 您实际想要运行的进程之前被终止。 您的分叉运行包含三个进程:
libcheck
进程C
在 A: a.out
\
B: (forked test suite)
\
C: (forked process in test case)
之前被 B 杀死,这意味着它继承了
exec
安装的信号配置。再次,它捕获 SIGTERM 并杀死整个进程组。