无法在 Oracle 8 ARM64 中使 sys_call_table 读写

问题描述 投票:0回答:1

内核版本:5.15.0-205.149.5.1.el8uek.aarch64(5.15系列全部) 操作系统:Oracle 8 ARM64

我正在尝试连接到 sys_call_table,同样我正在修改 sys_call_table 的读写权限。此代码失败(返回 -22):

// SPDX-License-Identifier: GPL-3.0
#include <linux/init.h>     // module_{init,exit}()
#include <linux/module.h>   // THIS_MODULE, MODULE_VERSION, ...
#include <linux/kernel.h>   // printk(), pr_*()
#include <linux/kallsyms.h> // kallsyms_lookup_name()
#include <asm/syscall.h>    // syscall_fn_t, __NR_*
#include <asm/ptrace.h>     // struct pt_regs
#include <asm/tlbflush.h>   // flush_tlb_kernel_range()
#include <asm/pgtable.h>    // {clear,set}_pte_bit(), set_pte()
#include <linux/vmalloc.h>  // vm_unmap_aliases()
#include <linux/mm.h>       // struct mm_struct, apply_to_page_range()
#include <linux/kconfig.h>  // IS_ENABLED()

#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

static struct mm_struct *init_mm_ptr;
static syscall_fn_t *syscall_table;
static syscall_fn_t original_read;

/********** HELPERS **********/

// From arch/arm64/mm/pageattr.c.
struct page_change_data {
    pgprot_t set_mask;
    pgprot_t clear_mask;
};

// From arch/arm64/mm/pageattr.c.
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
    struct page_change_data *cdata = data;
    pte_t pte = READ_ONCE(*ptep);

    pte = clear_pte_bit(pte, cdata->clear_mask);
    pte = set_pte_bit(pte, cdata->set_mask);

    set_pte(ptep, pte);
    return 0;
}

// From arch/arm64/mm/pageattr.c.
static int __change_memory_common(unsigned long start, unsigned long size,
                  pgprot_t set_mask, pgprot_t clear_mask)
{
    struct page_change_data data;
    int ret;

    data.set_mask = set_mask;
    data.clear_mask = clear_mask;

    ret = apply_to_page_range(init_mm_ptr, start, size, change_page_range, &data);

    flush_tlb_kernel_range(start, start + size);
    return ret;
}

// Simplified set_memory_rw() from arch/arm64/mm/pageattr.c.
static int set_page_rw(unsigned long addr)
{
    vm_unmap_aliases();    
    return __change_memory_common(addr, PAGE_SIZE, __pgprot(PTE_WRITE), __pgprot(PTE_RDONLY));
}

// Simplified set_memory_ro() from arch/arm64/mm/pageattr.c.
static int set_page_ro(unsigned long addr)
{
    vm_unmap_aliases();
    return __change_memory_common(addr, PAGE_SIZE, __pgprot(PTE_RDONLY), __pgprot(PTE_WRITE));
}

/********** ACTUAL MODULE **********/

static long myread(const struct pt_regs *regs)
{
    pr_info("read() called\n");
    return original_read(regs);
}

static int __init modinit(void)
{
    int res;

    pr_info("init\n");

    // Shouldn't fail.
    init_mm_ptr = (struct mm_struct *)kallsyms_lookup_name("init_mm");
    syscall_table = (syscall_fn_t *)kallsyms_lookup_name("sys_call_table");

    original_read = syscall_table[__NR_read];

    res = set_page_rw((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
    if (res != 0) {
        pr_err("set_page_rw() failed: %d\n", res);
        return res;
    }

    syscall_table[__NR_read] = myread;

    res = set_page_ro((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
    if (res != 0) {
        pr_err("set_page_ro() failed: %d\n", res);
        return res;
    }

    pr_info("init done\n");

    return 0;
}

static void __exit modexit(void)
{
    int res;

    pr_info("exit\n");

    res = set_page_rw((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
    if (res != 0) {
        pr_err("set_page_rw() failed: %d\n", res);
        return;
    }

    syscall_table[__NR_read] = original_read;

    res = set_page_ro((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
    if (res != 0)
        pr_err("set_page_ro() failed: %d\n", res);

    pr_info("goodbye\n");
}

module_init(modinit);
module_exit(modexit);
MODULE_VERSION("0.1");
MODULE_LICENSE("GPL");

(参考自https://stackoverflow.com/a/61465861/17435873

但是相同的代码对于内核版本 5.4.17-2136.331.7.el8uek.aarch64 可以正常工作(所有 5.4 系列及以下版本)

我使用 vmalloc 创建了一个内核内存页。我可以使用相同的功能将它们变成读写和只读。

代码:

char *foo;

static int __init modinit(void)
{
    int res;

    pr_info("init\n");

    // Shouldn't fail.
    init_mm_ptr = (struct mm_struct *)kallsyms_lookup_name("init_mm");
    syscall_table = (syscall_fn_t *)kallsyms_lookup_name("sys_call_table");

    foo = vmalloc(PAGE_SIZE);
    res = set_page_ro((unsigned long)foo); // Returns 0 (success)
    foo[420] = '1';    // Crashed here
    res = set_page_rw((unsigned long)foo);
    foo[420] = '1';    // No crashes, set_page_rw returns 0 (success)

    pr_info("init done\n");

    return 0;
}

更新:

所以我深入研究了这些函数:apply_to_page_range、apply_to_p4d_range、apply_to_pud_range、apply_to_pmd_range、apply_to_pte_range。 (全部复制源码制作自定义功能)。

函数栈:

  1. 应用于pmd_范围
  2. pmd_leaf ( if (WARN_ON_ONCE(my_pmd_leaf(*pmd))) 在 my_apply_to_pmd_range 内)
  3. !pmd_table ( #define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd)) )

!pmd_table 对于 sys_call_table 返回 1,对于 vmalloc 返回 0(唯一的区别)

自定义功能:

int my_pmd_leaf(pmd_t pmd){
    printk("inside %s\n", __FUNCTION__);
    int a1 = pmd_present(pmd), a2 = !pmd_table(pmd), a3 = a1 && a2;
    printk("a1=%d, a2=%d, a3=%d\n", a1, a2, a3);
    return a3;
}

输出:

vmalloc page
[71627.367740] inside my_pmd_leaf
[71627.369595] a1=1, a2=0, a3=0

sys_call_table
[71756.857568] inside my_pmd_leaf
[71756.859347] a1=1, a2=1, a3=1
c linux-kernel hook system-calls rootkit
1个回答
0
投票

我在 QEMU 下运行了完全相同的内核并测试了该模块。如果您查看内核日志,您会很容易注意到问题:

[   31.115415] test: init
[   31.159343] ------------[ cut here ]------------
[   31.159451] WARNING: CPU: 0 PID: 131 at mm/memory.c:2743 apply_to_pmd_range+0xf8/0x1d0
[   31.159654] Modules linked in: test(OE+)
[   31.159974] CPU: 0 PID: 131 Comm: insmod Tainted: G           OE     5.15.0-205.149.5.1.el8uek.aarch64 #2
[   31.160186] Hardware name: linux,dummy-virt (DT)
[   31.160383] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[   31.160512] pc : apply_to_pmd_range+0xf8/0x1d0
[   31.160617] lr : apply_to_pmd_range+0x50/0x1d0
[   31.160754] sp : ffff80000bedba00
[   31.160867] x29: ffff80000bedba00 x28: ffff0000ffffe250 x27: ffff800009520000
[   31.161086] x26: 0000000000000000 x25: ffff80000951ffff x24: ffff80000bedbb88
[   31.161256] x23: ffff8000010f0000 x22: ffff80000ac244a8 x21: ffff80000bedbae4
[   31.161369] x20: 0000000000000001 x19: ffff800009520000 x18: 0000000000000000
[   31.161482] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
[   31.161592] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[   31.161701] x11: 0000000000000000 x10: 0000000000000000 x9 : ffff80000837d010
[   31.161829] x8 : 0000000000000000 x7 : ffff80000bedbae4 x6 : 0000000000000001
[   31.161942] x5 : ffff80000bedbb88 x4 : ffff8000010f0000 x3 : 0c00000000000001
[   31.162055] x2 : ffff80000951f000 x1 : 0000000000000001 x0 : 0060000041600781
[   31.162280] Call trace:
[   31.162468]  apply_to_pmd_range+0xf8/0x1d0
[   31.162567]  apply_to_pud_range+0x9c/0x1f4
[   31.162637]  __apply_to_page_range+0xb8/0x190
[   31.162707]  apply_to_page_range+0x1c/0x44
[   31.162772]  __change_memory_common.constprop.0+0x58/0xe0 [test]
[   31.163198]  set_page_rw+0x30/0x50 [test]
[   31.163343]  modinit+0x78/0x1000 [test]
[   31.163486]  do_one_initcall+0x64/0x200
[   31.163556]  do_init_module+0x50/0x27c
[   31.163618]  load_module+0xa20/0xb70
[   31.163678]  __do_sys_init_module+0xe8/0x180
[   31.163748]  __arm64_sys_init_module+0x24/0x40
[   31.163814]  invoke_syscall+0x50/0x15c
[   31.163871]  el0_svc_common+0x48/0x144
[   31.163925]  do_el0_svc+0x30/0xe0
[   31.163976]  el0_svc+0x30/0xf0
[   31.164029]  el0t_64_sync_handler+0xc4/0x148
[   31.164091]  el0t_64_sync+0x1a4/0x1a8
[   31.164238] ---[ end trace 202353dcbe129dc1 ]---
[   31.164562] test: set_page_rw() failed: -22

您很可能会触发

WARN_ON_ONCE(pmd_leaf(*pmd))
中的
apply_to_pmd_range()
参见此处)。这是因为内核页表在大页内有系统调用表(这是有道理的,这节省了时间和内存),但
__change_memory_common()
函数根本不处理大页。每当遇到巨大的映射(
pud_leaf()
pmd_leaf()
)时,它就会用
-EINVAL
来退出。

我重新实现了这些函数,通过传递额外的回调函数来处理叶 PMD 来处理发现巨大 PMD 映射的情况 (

pmd_leaf()
)。我没有费心为 PUD 实现相同的功能,因为没有方便的辅助函数来处理 PUD,但这在技术上也是可行的。

另外,请注意,在此内核上,

kallsyms_lookup_name()
函数未导出,因此我使用此处描述的 kprobes 技巧在运行时找到它。

这是更新后的代码,在带有内核的 QEMU 上进行了测试

5.15.0-205.149.5.1.el8uek.aarch64

,它似乎工作正常。

// SPDX-License-Identifier: GPL-3.0 #include <linux/init.h> // module_{init,exit}() #include <linux/module.h> // THIS_MODULE, MODULE_VERSION, ... #include <linux/kernel.h> // printk(), pr_*() #include <linux/kallsyms.h> // kallsyms_lookup_name() #include <asm/syscall.h> // syscall_fn_t, __NR_* #include <asm/ptrace.h> // struct pt_regs #include <asm/tlbflush.h> // flush_tlb_kernel_range() #include <asm/pgtable.h> // {clear,set}_pte_bit(), set_pte() #include <linux/vmalloc.h> // vm_unmap_aliases() #include <linux/mm.h> // struct mm_struct, apply_to_page_range() #include <linux/kprobes.h> // register_kprobe(), unregister_kprobe() #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt static struct mm_struct *init_mm_ptr; static syscall_fn_t *syscall_table; static syscall_fn_t original_read; /***** HELPERS ****************************************************************/ /** * This is an enhanced implementation of __apply_to_page_range() that is also * capable of handling huge PMD mappings (pmd_leaf()). The original * implementation of __apply_to_page_range() only handles last-level PTEs, and * fails with -EINVAL for PMD mappings. This implementation takes 2 function * pointers instead of a single one: * * - pte_fn_t fn_pte: function to apply changes to a leaf PTE * - pmd_fn_t fn_pmd: function to apply changes to a leaf PMD */ // pte_fn_t already present in <linux/mm.h> typedef int (*pmd_fn_t)(pmd_t *pmd, unsigned long addr, void *data); typedef int (*pud_fn_t)(pud_t *pud, unsigned long addr, void *data); // From arch/arm64/mm/hugetlbpage.c int pmd_huge(pmd_t pmd) { return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); } // Adapted from arch/arm64/mm/hugetlbpage.c int pud_huge(pud_t pud) { #if CONFIG_PGTABLE_LEVELS == 2 return 0; #else return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); #endif } // From arch/arm64/mm/pageattr.c. struct page_change_data { pgprot_t set_mask; pgprot_t clear_mask; }; // From arch/arm64/mm/pageattr.c. static int change_page_range(pte_t *ptep, unsigned long addr, void *data) { struct page_change_data *cdata = data; pte_t pte = READ_ONCE(*ptep); pte = clear_pte_bit(pte, cdata->clear_mask); pte = set_pte_bit(pte, cdata->set_mask); set_pte(ptep, pte); return 0; } static int change_pmd_range(pmd_t *pmdp, unsigned long addr, void *data) { struct page_change_data *cdata = data; pmd_t pmd = READ_ONCE(*pmdp); pmd = clear_pmd_bit(pmd, cdata->clear_mask); pmd = set_pmd_bit(pmd, cdata->set_mask); set_pmd(pmdp, pmd); return 0; } // Adapted from mm/memory.c static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data, pgtbl_mod_mask *mask) { pte_t *pte, *mapped_pte; int err = 0; spinlock_t *ptl; mapped_pte = pte = (mm == init_mm_ptr) ? pte_offset_kernel(pmd, addr) : pte_offset_map_lock(mm, pmd, addr, &ptl); BUG_ON(pmd_huge(*pmd)); arch_enter_lazy_mmu_mode(); if (fn) { do { if (!pte_none(*pte)) { err = fn(pte++, addr, data); if (err) break; } } while (addr += PAGE_SIZE, addr != end); } *mask |= PGTBL_PTE_MODIFIED; arch_leave_lazy_mmu_mode(); if (mm != init_mm_ptr) pte_unmap_unlock(mapped_pte, ptl); return err; } // Adapted from mm/memory.c static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, pte_fn_t fn_pte, pmd_fn_t fn_pmd, void *data, pgtbl_mod_mask *mask) { pmd_t *pmd; unsigned long next; int err = 0; BUG_ON(pud_huge(*pud)); pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) continue; if (pmd_leaf(*pmd)) { if (!fn_pmd || pmd_none(*pmd)) continue; err = fn_pmd(pmd, addr, data); if (err) break; } else { if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) continue; err = apply_to_pte_range(mm, pmd, addr, next, fn_pte, data, mask); if (err) break; } } while (pmd++, addr = next, addr != end); return err; } // Adapted from mm/memory.c static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, pte_fn_t fn_pte, pmd_fn_t fn_pmd, void *data, pgtbl_mod_mask *mask) { pud_t *pud; unsigned long next; int err = 0; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none(*pud)) continue; if (WARN_ON_ONCE(pud_leaf(*pud))) return -EINVAL; if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) continue; err = apply_to_pmd_range(mm, pud, addr, next, fn_pte, fn_pmd, data, mask); if (err) break; } while (pud++, addr = next, addr != end); return err; } // Adapted from mm/memory.c static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, pte_fn_t fn_pte, pmd_fn_t fn_pmd, void *data, pgtbl_mod_mask *mask) { p4d_t *p4d; unsigned long next; int err = 0; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none(*p4d)) continue; if (WARN_ON_ONCE(p4d_leaf(*p4d))) return -EINVAL; if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) continue; err = apply_to_pud_range(mm, p4d, addr, next, fn_pte, fn_pmd, data, mask); if (err) break; } while (p4d++, addr = next, addr != end); return err; } // Adapted from mm/memory.c static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, unsigned long size, pte_fn_t fn_pte, pmd_fn_t fn_pmd, void *data) { pgd_t *pgd; unsigned long start = addr, next; unsigned long end = addr + size; pgtbl_mod_mask mask = 0; int err = 0; if (WARN_ON(addr >= end)) return -EINVAL; pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none(*pgd)) continue; if (WARN_ON_ONCE(pgd_leaf(*pgd))) return -EINVAL; if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) continue; err = apply_to_p4d_range(mm, pgd, addr, next, fn_pte, fn_pmd, data, &mask); if (err) break; } while (pgd++, addr = next, addr != end); if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, start + size); return err; } // Adapted from arch/arm64/mm/pageattr.c. static int __change_memory_common(unsigned long start, unsigned long size, pgprot_t set_mask, pgprot_t clear_mask) { struct page_change_data data; int ret; data.set_mask = set_mask; data.clear_mask = clear_mask; ret = __apply_to_page_range(init_mm_ptr, start, size, &change_page_range, &change_pmd_range, &data); if (ret) pr_info("__apply_to_page_range() failed: %d\n", ret); flush_tlb_kernel_range(start, start + size); return ret; } // Simplified version of set_memory_rw() from arch/arm64/mm/pageattr.c. static int set_page_rw(unsigned long addr) { vm_unmap_aliases(); return __change_memory_common(addr, PAGE_SIZE, __pgprot(PTE_WRITE), __pgprot(PTE_RDONLY)); } // Simplified version of set_memory_ro() from arch/arm64/mm/pageattr.c. static int set_page_ro(unsigned long addr) { vm_unmap_aliases(); return __change_memory_common(addr, PAGE_SIZE, __pgprot(PTE_RDONLY), __pgprot(PTE_WRITE)); } /***** ACTUAL MODULE **********************************************************/ static long myread(const struct pt_regs *regs) { pr_info("read() called\n"); return original_read(regs); } typedef unsigned long (*kallsyms_lookup_name_t)(const char *); static int __init modinit(void) { struct kprobe kp = { .symbol_name = "kallsyms_lookup_name" }; kallsyms_lookup_name_t kallsyms_lookup_name; int res; pr_info("init\n"); // Workaround for kallsyms_lookup_name() not being exported: find it // using kprobes. res = register_kprobe(&kp); if (res != 0) { pr_err("register_kprobe() failed: %d\n", res); return res; } kallsyms_lookup_name = (kallsyms_lookup_name_t)kp.addr; unregister_kprobe(&kp); init_mm_ptr = (struct mm_struct *)kallsyms_lookup_name("init_mm"); if (!init_mm_ptr) { pr_err("init_mm not found\n"); return -ENOSYS; } syscall_table = (syscall_fn_t *)kallsyms_lookup_name("sys_call_table"); if (!syscall_table) { pr_err("sys_call_table not found\n"); return -ENOSYS; } original_read = syscall_table[__NR_read]; res = set_page_rw((unsigned long)(syscall_table + __NR_read) & PAGE_MASK); if (res != 0) { pr_err("set_page_rw() failed: %d\n", res); return res; } syscall_table[__NR_read] = myread; res = set_page_ro((unsigned long)(syscall_table + __NR_read) & PAGE_MASK); if (res != 0) { pr_err("set_page_ro() failed: %d\n", res); return res; } pr_info("init done\n"); return 0; } static void __exit modexit(void) { int res; pr_info("exit\n"); res = set_page_rw((unsigned long)(syscall_table + __NR_read) & PAGE_MASK); if (res != 0) { pr_err("set_page_rw() failed: %d\n", res); return; } syscall_table[__NR_read] = original_read; res = set_page_ro((unsigned long)(syscall_table + __NR_read) & PAGE_MASK); if (res != 0) pr_err("set_page_ro() failed: %d\n", res); pr_info("goodbye\n"); } module_init(modinit); module_exit(modexit); MODULE_VERSION("0.1"); MODULE_AUTHOR("Marco Bonelli"); MODULE_DESCRIPTION("Syscall hijack on arm64."); MODULE_LICENSE("GPL");
    
© www.soinside.com 2019 - 2024. All rights reserved.