sigaction 处理程序正在无限期地等待锁定

问题描述 投票:0回答:1

我最近编写了一个信号处理程序,它使用 execinfo.h 中的回溯,它在 MacO 上运行良好,但当它在 Linux(Ubuntu Debian)上使用时,它会无限期地等待锁定。不确定这是否有帮助,但我的多线程程序(pthread)使用rocksdb来存储数据,并且我故意在rocksdb中保留段错误,以便如果rocksdb端出现任何问题,我可以测试我的信号处理程序,但我无法调试为什么锁定正在等待。

这是我在 gdb 上得到的堆栈跟踪:

#0  futex_wait (private=0, expected=2, futex_word=0x77e088a1ac80 <main_arena>) at ../sysdeps/nptl/futex-internal.h:146
#1  __GI___lll_lock_wait_private (futex=futex@entry=0x77e088a1ac80 <main_arena>) at ./nptl/lowlevellock.c:34
#2  0x000077e0888a53c8 in __GI___libc_malloc (bytes=408) at ./malloc/malloc.c:3327
#3  0x000077e088c024a3 in malloc (size=408) at ../include/rtld-malloc.h:56
#4  _dl_scope_free (old=old@entry=0x5660325219f0) at ./elf/dl-scope.c:34
#5  0x000077e088bf3308 in _dl_map_object_deps (map=map@entry=0x566032520dc0, preloads=preloads@entry=0x0, npreloads=npreloads@entry=0, 
    trace_mode=trace_mode@entry=0, open_mode=open_mode@entry=-2147483648) at ./elf/dl-deps.c:635
#6  0x000077e088bfda0f in dl_open_worker_begin (a=a@entry=0x7fff4a7a5010) at ./elf/dl-open.c:592
#7  0x000077e088974a98 in __GI__dl_catch_exception (exception=exception@entry=0x7fff4a7a4e70, operate=operate@entry=0x77e088bfd900 <dl_open_worker_begin>, 
    args=args@entry=0x7fff4a7a5010) at ./elf/dl-error-skeleton.c:208
#8  0x000077e088bfcf9a in dl_open_worker (a=a@entry=0x7fff4a7a5010) at ./elf/dl-open.c:782
#9  0x000077e088974a98 in __GI__dl_catch_exception (exception=exception@entry=0x7fff4a7a4ff0, operate=operate@entry=0x77e088bfcf60 <dl_open_worker>, 
    args=args@entry=0x7fff4a7a5010) at ./elf/dl-error-skeleton.c:208
#10 0x000077e088bfd34e in _dl_open (file=<optimized out>, mode=-2147483646, caller_dlopen=0x77e088925611 <__GI___libc_unwind_link_get+81>, nsid=-2, argc=3, 
    argv=<optimized out>, env=0x5660324f9fe0) at ./elf/dl-open.c:883
#11 0x000077e088974e01 in do_dlopen (ptr=ptr@entry=0x7fff4a7a5240) at ./elf/dl-libc.c:95
#12 0x000077e088974a98 in __GI__dl_catch_exception (exception=exception@entry=0x7fff4a7a51e0, operate=<optimized out>, args=<optimized out>)
    at ./elf/dl-error-skeleton.c:208
#13 0x000077e088974b63 in __GI__dl_catch_error (objname=0x7fff4a7a5230, errstring=0x7fff4a7a5238, mallocedp=0x7fff4a7a522f, operate=<optimized out>, 
    args=<optimized out>) at ./elf/dl-error-skeleton.c:227
#14 0x000077e088974f37 in dlerror_run (args=0x7fff4a7a5240, operate=0x77e088974dc0 <do_dlopen>) at ./elf/dl-libc.c:45
#15 __libc_dlopen_mode (name=name@entry=0x77e0889db527 "libgcc_s.so.1", mode=mode@entry=-2147483646) at ./elf/dl-libc.c:162
#16 0x000077e088925611 in __GI___libc_unwind_link_get () at ./misc/unwind-link.c:50
#17 __GI___libc_unwind_link_get () at ./misc/unwind-link.c:40
#18 0x000077e088933b77 in __GI___backtrace (array=array@entry=0x77e088af0000 <backtrace_frames>, size=size@entry=1) at ./debug/backtrace.c:69
#19 0x000077e088a65f92 in dumpBackTrace () at my_faultHandler.c:366
#20 0x000077e088a66027 in faultHandler (signo=6) at my_faultHandler.c:344
#21 <signal handler called>
#22 __pthread_kill_implementation (no_tid=0, signo=6, threadid=131806249563968) at ./nptl/pthread_kill.c:44
#23 __pthread_kill_internal (signo=6, threadid=131806249563968) at ./nptl/pthread_kill.c:78
#24 __GI___pthread_kill (threadid=131806249563968, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
#25 0x000077e088842476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#26 0x000077e0888287f3 in __GI_abort () at ./stdlib/abort.c:79
#27 0x000077e088889676 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x77e0889dbb77 "%s\n") at ../sysdeps/posix/libc_fatal.c:155
#28 0x000077e0888a0cfc in malloc_printerr (str=str@entry=0x77e0889de5b8 "malloc_consolidate(): unaligned fastbin chunk detected") at ./malloc/malloc.c:5664
#29 0x000077e0888a198c in malloc_consolidate (av=av@entry=0x77e088a1ac80 <main_arena>) at ./malloc/malloc.c:4750
#30 0x000077e0888a3bdb in _int_malloc (av=av@entry=0x77e088a1ac80 <main_arena>, bytes=bytes@entry=32816) at ./malloc/malloc.c:3965
#31 0x000077e0888a5139 in __GI___libc_malloc (bytes=bytes@entry=32816) at ./malloc/malloc.c:3329
#32 0x000077e0888e630b in __alloc_dir (statp=0x7fff4a7a5ec0, flags=0, close_fd=true, fd=39) at ../sysdeps/unix/sysv/linux/opendir.c:115
#33 opendir_tail (fd=39) at ../sysdeps/unix/sysv/linux/opendir.c:63
#34 __opendir (name=<optimized out>) at ../sysdeps/unix/sysv/linux/opendir.c:86
#35 0x000077e087f93748 in rocksdb::(anonymous namespace)::PosixEnv::GetChildren (this=<optimized out>, 
    dir="/home/dummy/rocks", result=0x7fff4a7a6080)
    at /usr/include/c++/9/bits/basic_string.h:2309
#36 0x000077e087eeaae0 in rocksdb::DBImpl::FindObsoleteFiles (this=this@entry=0x56603283fc40, job_context=job_context@entry=0x7fff4a7a6180, force=force@entry=true, 
--Type <RET> for more, q to quit, c to continue without paging--
    no_full_scan=no_full_scan@entry=false) at db/db_impl_files.cc:200
#37 0x000077e087eccfd3 in rocksdb::DBImpl::~DBImpl (this=0x56603283fc40, __in_chrg=<optimized out>) at db/db_impl.cc:308
#38 0x000077e087ecd3f6 in rocksdb::DBImpl::~DBImpl (this=0x56603283fc40, __in_chrg=<optimized out>) at db/db_impl.cc:357
#39 0x000077e087e66e9d in rocksdb_close (db=0x5660328a2b20) at db/c.cc:627

信号处理程序代码:

void RegisterFaultHandler(void)
{
    struct sigaction bt_action;

    sigemptyset(&bt_action.sa_mask);
    bt_action.sa_handler = &faultHandler;
    bt_action.sa_flags   = SA_RESTART | SA_ONSTACK;

    if (sigaction(SIGSEGV, &bt_action, prev_action + SIGSEGV) || sigaction(SIGBUS, &bt_action, prev_action + SIGBUS) ||
        sigaction(SIGILL, &bt_action, prev_action + SIGILL) || sigaction(SIGABRT, &bt_action, prev_action + SIGABRT) ||
        sigaction(SIGFPE, &bt_action, prev_action + SIGFPE) || sigaction(SIGSYS, &bt_action, prev_action + SIGSYS))
    {
        int savedErrno = errno;
        exit(1);
    }
}

static void unRegisterFaultHandler()
{
    /* Install 'previous' fault handler for all 'crash' (fatal) signals */
    sigaction(SIGSEGV, prev_action + SIGSEGV, NULL);
    sigaction(SIGBUS, prev_action + SIGBUS, NULL);
    sigaction(SIGILL, prev_action + SIGILL, NULL);
    sigaction(SIGABRT, prev_action + SIGABRT, NULL);
    sigaction(SIGFPE, prev_action + SIGFPE, NULL);
    sigaction(SIGSYS, prev_action + SIGSYS, NULL);
}

static void faultHandler(int signo)
{
    /* Disable fault_handler to call previous fault handlers, if any */
    unRegisterFaultHandler();

    dumpBackTrace();

    /* Propagate the signal back to, previous handler */
    raise(signo);
}

static void dumpBackTrace()
{
    int bt_fd = openBackTraceFile(); /* This will just open my file with open() system call */

    if (bt_fd >= 0)
    {
        static void *backtrace_frames[10];
        int          size = backtrace(backtrace_frames, 10);

        backtrace_symbols_fd(backtrace_frames, size, bt_fd);

        close(bt_fd);
    }
    else
    {
        const char error[] = "Cannot open backtrace file\n";

        (void)write(STDERR_FILENO, error, sizeof(error));
    }
}

我知道在第 3 帧中调用 malloc 可能是原因,因为它不安全,但我不知道如何解决这个问题。尝试在互联网上搜索答案,我只能找到 malloc 部分。如果您需要更多信息,请告诉我。

编辑-1: 仅当在rocksdb端发生分段错误并且我的程序中没有收到任何分段错误时,我才会遇到此问题。我认为这可能是由于 malloc_consolidate 发生错误而 backtrace 本身再次调用 malloc

c++ c signals rocksdb
1个回答
0
投票

您的信号处理程序从根本上被破坏了 - 它不是异步信号安全的。

C11 7.1.4 库函数的使用,第 4 段

标准库中的函数不保证可重入,并且可能会修改具有静态或线程存储持续时间的对象。188

注意脚注188的链接

  1. 因此,信号处理程序通常不能调用标准库函数。

根据 POSIX 7“信号概念Linux

signal-safety
手册页,有一组有限的“异步信号安全”函数可以从信号处理程序中调用。

您的回溯特别感兴趣的是这一行:

#3  0x000077e088c024a3 in malloc (size=408) at ../include/rtld-malloc.h:56

malloc()
not 在任何异步信号安全函数列表中,并且从信号处理程序中调用
malloc()
(即使是间接调用)也是不安全的,并且可能会导致问题,例如死锁。

© www.soinside.com 2019 - 2024. All rights reserved.