作为Linux ramdump parser工具的Ramdump.py中,定义了核心的数据结构Ramdump。这个类非常庞大,几千行代码。定义了非常多的函数结构,我们需要了解这些接口,明白其原理才能在后续的插件开发中得心应手!

故本章节讲解class Ramdump的核心函数,且只介绍一些非常常用的接口

常用接口

get_kernel_version

    def get_kernel_version(self):
        if self.kernel_version == (0, 0, 0):
            vm_v = self.gdbmi.get_value_of_string('linux_banner')
            if vm_v is None:
                print_out_str('!!! Could not read linux_banner from vmlinux!')
                sys.exit(1)
            v = re.search('Linux version (\d{0,2}\.\d{0,2}\.\d{0,3})', vm_v)
            if v is None:
                print_out_str('!!! Could not extract version info!')
                sys.exit(1)
            self.version = v.group(1)
            match = re.search('(\d+)\.(\d+)\.(\d+)', self.version)
            if match is not None:
                self.version = tuple(map(int, match.groups()))
                self.kernel_version = self.version
                self.linux_banner = vm_v
            else:
                print_out_str('!!! Could not extract version info! {0}'.format(self.version))
                sys.exit(1)

        return self.kernel_version

通过调用ramdump实例的get_kernel_version函数可以得到kernel版本号

其实我们可以看到其实获取版本号的函数self.gdbmi.get_value_of_string,这个是gdbmi库的标准接口

在ramdump初始化时被初始化,实际上就是通过gdb解析vmlinux

有点类似 strings工具的执行结果

strings vmlinux |grep "Linux_banner"

这个get_kernel_version还是非常有用的,因为kernel版本的不一致,一些内核结构发生的一些变化,所以解析时也需要通过内核版本进行区分,比如

            if self.get_kernel_version() >= (5, 4):
                self.page_offset = -(1 << self.va_bits) % (1 << 64)
                if self.address_of('__start_init_task') is not None:
                    self.thread_size = self.address_of('__end_init_task') - self.address_of('__start_init_task')
                else:
                    self.thread_size = self.address_of('__end_init_stack') - self.address_of('__start_init_stack')
            else:
                self.page_offset = 0xffffffc000000000
                self.thread_size = 16384

address_of

    def address_of(self, symbol):
        cached_data = self.cached_data['addressof']
        kaslr_tmp = self.get_kaslr_offset()
        if kaslr_tmp in cached_data:
            if symbol in cached_data[kaslr_tmp]:
                return cached_data[kaslr_tmp][symbol]

        """Returns the address of a symbol.

        :param symbol: name of the symbol.
        :type symbol: str

        :return: address value

        Example:

        >>> hex(dump.address_of('linux_banner'))
        '0xffffffc000c7a0a8L'
        """
        if kaslr_tmp not in cached_data:
            cached_data[kaslr_tmp] = {}
        try:
            r = self.gdbmi.address_of(symbol)
            cached_data[kaslr_tmp][symbol] = r
            return r
        except gdbmi.GdbMIException:
            if self.hyp:
                try:
                    r = self.gdbmi_hyp.address_of(symbol)
                    cached_data[kaslr_tmp][symbol] = r
                    return r
                except gdbmi.GdbMIException:
                    pass

这个函数的功能就是获取指定的symbol的地址。那如何使用呢?

mod_list = self.address_of('modules')
_text = self.address_of('_text')
per_cpu_offset_addr = self.address_of('__per_cpu_offset')
cache_base_addr = self.ramdump.address_of('l2_dump')
mem_dump_data = self.ramdump.address_of('mem_dump_data')

field_offset

    def field_offset(self, the_type, field):
        cached_data = self.cached_data['fieldoffset']
        if the_type in cached_data:
            if field in cached_data[the_type]:
                return cached_data[the_type][field]

        """Gets the offset of a field from the base of its containing struct.

        This can be useful when reading struct fields, although you should
        consider using :func:`~read_structure_field` if
        you're reading a word-sized value.

        Example:

        >>> dump.field_offset('struct device', 'bus')
        168
        """
        if the_type not in cached_data:
            cached_data[the_type] = {}
        try:
            r = self.gdbmi.field_offset(the_type, field)
            cached_data[the_type][field] = r
            return r
        except gdbmi.GdbMIException:
            if self.hyp:
                try:
                    r = self.gdbmi_hyp.field_offset(the_type, field)
                    cached_data[the_type][field] = r
                    return r
                except gdbmi.GdbMIException:
                    pass

函数功能:返回结构体the_type的成员field的offset

用法:

        next_offset = self.field_offset('struct list_head', 'next')
        list_offset = self.field_offset('struct module', 'list')
        name_offset = self.field_offset('struct module', 'name')

container_of

    def container_of(self, ptr, the_type, member):
        """Like ``container_of`` in the kernel."""
        try:
            return self.gdbmi.container_of(ptr, the_type, member)
        except gdbmi.GdbMIException:
            if self.hyp:
                try:
                    return self.gdbmi_hyp.container_of(ptr, the_type, member)
                except gdbmi.GdbMIException:
                    pass

函数功能:和内核container_of类似

read_cstring

    def read_cstring(self, addr_or_name, max_length=100, virtual=True,
                     cpu=None, allow_elf=False):
        """Reads a C string."""
        addr = addr_or_name
        s = None
        if virtual:
            if cpu is not None:
                pcpu_offset = self.per_cpu_offset(cpu)
                addr_or_name = self.resolve_virt(addr_or_name)
                addr_or_name += pcpu_offset + self.per_cpu_offset(cpu)
            addr = self.virt_to_phys(addr_or_name)
            if allow_elf and addr is None:
                s = self.gdbmi.read_memory(addr_or_name, '{}+{}'.format(addr_or_name, max_length))
        if not s:
            s = self.read_physical(addr, max_length)
        if s is not None:
            a = s.decode('ascii', 'ignore')
            return a.split('\0')[0]
        else:
            return s

函数功能:从某地址读取字符串

用法:

self.ramdump.read_cstring(pointer)
mod_name = self.read_cstring(mod + name_offset)

read_structure_field

    def read_structure_field(self, addr_or_name, struct_name, field, virtual=True):
        """reads a 4 or 8 byte field from a structure"""
        size = self.sizeof("(({0} *)0)->{1}".format(struct_name, field))
        addr = self.resolve_virt(addr_or_name)
        if addr is None or size is None:
            return None

        addr += self.field_offset(struct_name, field)
        if size == 1:
            return self.read_byte(addr, virtual)
        if size == 2:
            return self.read_u16(addr, virtual)
        if size == 4:
            return self.read_u32(addr, virtual)
        if size == 8:
            return self.read_u64(addr, virtual)
        return None

函数用法:从一个结构体里读取一个成员(普通变量)

用法:

ver = int(self.read_structure_field(socinfo, 'struct socinfo', 'ver') or 0)
serial_num = int(self.read_structure_field(socinfo, 'struct socinfo', 'serial_num') or 0)

read_structure_cstring

    def read_structure_cstring(self, addr_or_name, struct_name, field,
                               max_length=100):
        """reads a C string from a structure field.  The C string field will be
        dereferenced before reading, so it should be a ``char *``, not a
        ``char []``.
        """
        virt = self.resolve_virt(addr_or_name)
        cstring_addr = virt + self.field_offset(struct_name, field)
        return self.read_cstring(self.read_pointer(cstring_addr), max_length)

函数用法:从结构体中读取一个成员(char *类型)

用法:

self.ramdump.read_structure_cstring(zram_addr, 'struct zram', 'comp_algs', self.CRYPTO_MAX_ALG_NAME)

关于接口部分还有很多,具体可以自己查看,这里只介绍这几种常用的

read_elf_memory

    def read_elf_memory(self, addr, length, temp_file):
        s = self.gdbmi.read_elf_memory(addr, length, temp_file)
        if s is not None:
            a = s.decode('ascii', 'ignore')
            return a.split('\0')[0]
        else:
            return s

主要用于从 ELF 文件中提取字符串信息。

Ramdump类的初始化

在ramparse.py中我们定义了实例类ramdump,所以就会执行Ramdump的初始化代码,下面分开描述初始化流程干了哪些东西?这里忽略Ramdump结构体里的一些成员的初始化赋值,以及一下关于minidump/fulldump解析的区分的代码

解析Kconfig

        self.config = []
        self.config_dict = {}
        if not self.get_config():
            print_out_str('!!! Could not get saved configuration')
            print_out_str(
                '!!! This is really bad and probably indicates RAM corruption')
            print_out_str('!!! Some features may be disabled!')

        # extract kernel's configuration to kconfig.txt
        saved_config = self.open_file('kconfig.txt')
        for l in self.config:
            saved_config.write(l + '\n')

        saved_config.close()

通过调用get_config函数填充self.config列表,然后根据列表内容写入到kconfig.txt

    def get_config(self):
        kconfig_addr = self.address_of('kernel_config_data')   # 读取kernel_config_data的地址
        if kconfig_addr is None:
            return
        if self.get_kernel_version() > (5, 0, 0):   # 判断kernel版本是否大于5.0.0
            kconfig_addr_end = self.address_of('kernel_config_data_end') # 读取kernel_config_data_end的地址
            if kconfig_addr_end is None:
                return
            kconfig_size = kconfig_addr_end - kconfig_addr # kconfig的size大小
            # magic is 8 bytes before kconfig_addr and data
            # starts at kconfig_addr for kernel > 5.0.0
            kconfig_addr = kconfig_addr - 8       # kconfig_addr的实际地址要去除8byte的magic
        else:
            kconfig_size = self.sizeof('kernel_config_data')
            # size includes magic, offset from it
            kconfig_size = kconfig_size - 16 - 1

        # kconfig data starts with magic 8 byte string, go past that
        zconfig = os.path.join(self.outdir, "elf_temp.txt")  # 存储解析的临时elf
        temp_file = open(zconfig, 'wb+')
        size = kconfig_addr + 8
        s = self.read_elf_memory(kconfig_addr, size, temp_file) # 从elf文件中也就是vmlinux读取8个字符串写到临时文件
        temp_file.close()
        if s != 'IKCFG_ST':                # 判断前面8byte字符是不是符合规范
            return
        temp_file = open(zconfig, 'wb+')
        kconfig_addr = kconfig_addr + 8
        val = self.read_elf_memory(kconfig_addr, kconfig_size + kconfig_addr,
                                      temp_file) # 继续读后面的

        temp_file.close()
        zconfig_in = gzip.open(temp_file.name, 'rt') # 看起来读出来的是一个gzip压缩包
        try:
            t = zconfig_in.readlines() # 从压缩包里读出字符串
        except:
            return False
        zconfig_in.close() 
        os.remove(zconfig) # 删除临时文件
        for l in t:
            self.config.append(l.rstrip()) # 加到self.config中
            if not l.startswith('#') and l.strip() != '':
                eql = l.find('=')
                cfg = l[:eql]
                val = l[eql+1:]
                self.config_dict[cfg] = val.strip() # 这里是将kconfig中的非#开头以及非空行进行解析,存储到self.config_dict
        return True

这个函数执行后会产生kconfig.txt以及self.config_dict(如果需要查询某个config配置项,就很方便了)

也提供了现成的接口可以调用,也就get_config_val

get_config_val

    def get_config_val(self, config):
        """Gets the value of a kernel config option.

        Example:

        >>> va_bits = int(dump.get_config_val("CONFIG_ARM64_VA_BITS"))
        39
        """
        return self.config_dict.get(config)

函数功能:获取内核config项的值

用法:

self.pgtable_levels = int(self.get_config_val("CONFIG_PGTABLE_LEVELS"))

解析出kernel symbols table

这依赖于是否传参--dump_krnl_sym_tbl,默认这个是false,如果需要解析这个,可以加上这个参数

        if self.dump_kernel_symbol_table:
            self.dump_mod_sym_table('vmlinux', self.lookup_table)

    def dump_mod_sym_table(self, mod_name, sym_lookup_tbl):
        sym_dump_file = self.open_file('sym_tbl_'+mod_name+'.txt')
        for line in sym_lookup_tbl:
            sym_dump_file.write('0x{0:x} {1}\n'.format(line[0], line[1]))
        sym_dump_file.close()

vmlinux和dump检测是否匹配

        if not self.match_version():
            print_out_str('!!! Could not get the Linux version!')
            print_out_str(
                '!!! Your vmlinux is probably wrong for these dumps')
            print_out_str('!!! Exiting now')
            sys.exit(1)


    def match_version(self):
        banner_addr = self.address_of('linux_banner')   # 获取linux_banner的地址
        if banner_addr is not None:
            banner_addr = self.kernel_virt_to_phys(banner_addr)   # 虚拟地址转物理地址
            banner_len = len(self.linux_banner)
            b = self.read_cstring(banner_addr, banner_len, False) # 从ramdump中读出banner
            if b is None:
                print_out_str('!!! Banner not found in dumps!')
                return False
            print_out_str('Linux Banner: ' + b.rstrip())
            if str(self.linux_banner) in str(b):   # ramdump中的banner和vmlinx的banner比较
                print_out_str("Linux banner from vmlinux = %s" % self.linux_banner)  # self.linux_banner是再get_kernel_version时读vmlinux获取的
                print_out_str("Linux banner from dump = %s" % b)
                return True
            else:
                print_out_str("Expected Linux banner = %s" % self.linux_banner)
                print_out_str("Linux banner in Dumps = %s" % b)
                return False
        else:
            print_out_str('!!! linux_banner sym not found in vmlinux')
            return False

modules symbols加载

        if self.module_table.sym_paths_exist():
            self.setup_module_symbols()
            self.gdbmi.setup_module_table(self.module_table)
            if self.dump_global_symbol_table:
                self.dump_global_symbol_lookup_table()

获知modules symbols的路径

modules的symbols的路径是通过传参得到的,也就是-m参数。

        # Save all paths given from --mod_path option. These will be searched for .ko.unstripped files
        if options.mod_path_list:
            for path in options.mod_path_list:
                self.module_table.add_sym_path(path)

    def add_sym_path(self, sym_path):
        if sym_path is None:
            print_out_str('sym_path: not specified!')
            return False
        elif not os.path.exists(sym_path):
            print_out_str('sym_path: ' + sym_path + ' not valid or directory doesn\'t exist')
            return False
        else:
            self.sym_path_list.append(sym_path)
            return True

执行后,self.sym_path_list列表中就保存了所有的ko symbols的路径

设置module symbols

    def setup_module_symbols(self):
        self.traverse_module()
        if self.minidump:
            self.retrieve_minidump_modules()
        else:
            self.retrieve_modules()
        self.parse_module_symbols();
        self.add_symbols_to_global_lookup_table()

遍历模块填充模块相关结构体

    def traverse_module(self):
        for path in self.module_table.sym_path_list:
            def on_file(file):
                if file.endswith('.ko.unstripped'):
                    name = file[:-len('.ko.unstripped')]
                elif file.endswith('.ko'):
                    name = file[:-len('.ko')]
                else:
                    return
                name = os.path.basename(name)
                name = name.replace("-","_")
                # Prefer .ko.unstripped
                if self.ko_file_dict.get(name, '').endswith('.ko.unstripped') and file.endswith('.ko'):
                    return

                # Prefer ko with debug info
                if name in self.ko_file_dict and self.has_debug_info(self.ko_file_dict.get(name)):
                    return

                self.ko_file_dict[name] = file
                self.ko_file_names.append(name)
            self.walk_depth(path, on_file)

这个函数目的就是将这些module的symbols填充到self.ko_file_dict字典以及self.ko_file_names列表中

检索模块

这部分逻辑比较复杂,看代码备注把

    def retrieve_modules(self):
        mod_list = self.address_of('modules')           # 获取内核符号 modules 的虚拟地址,表示模块链表头
        next_offset = self.field_offset('struct list_head', 'next') # 获取链表结构体 list_head 中 next 字段的偏移
        list_offset = self.field_offset('struct module', 'list') # 获取模块结构体 module 中 list 字段的偏移
        name_offset = self.field_offset('struct module', 'name') # 获取模块结构体 module 中 name 字段的偏移
        if self.is_config_defined('CONFIG_SMP'):
            percpu_offset = self.field_offset('struct module', 'percpu')   # 如果配置了 CONFIG_SMP,获取 percpu 和 percpu_size 字段偏移
            percpu_size_offset = self.field_offset('struct module', 'percpu_size')

        if self.kernel_version >= (6, 4, 0):
            module_core_offset = self.field_offset('struct module', 'mem[0].base')  # 获取模块核心基址字段的偏移,不同内核版本struct module结构体不一样
        elif self.kernel_version > (4, 9, 0):
            module_core_offset = self.field_offset('struct module', 'core_layout.base')
        else:
            module_core_offset = self.field_offset('struct module', 'module_core')

        if self.field_offset('struct module_sect_attr', 'battr') is not None:   # 获取模块节属性名的偏移(有的内核有 battr,有的没有)
            sect_name_offset = self.field_offset('struct module_sect_attr', 'battr') + self.field_offset('struct bin_attribute', 'attr') + self.field_offset('struct attribute', 'name')
        else:
            sect_name_offset = self.field_offset('struct module_sect_attr', 'name')

        kallsyms_offset = self.field_offset('struct module', 'kallsyms')      # 获取模块结构体中其他相关字段的偏移和大小,包括 kallsyms、节地址、节数量、节属性、状态等
        sect_addr_offset = self.field_offset('struct module_sect_attr', 'address')
        nsections_offset = self.field_offset('struct module_sect_attrs', 'nsections')
        section_attrs_offset = self.field_offset('struct module_sect_attrs', 'attrs')
        section_attr_size = self.sizeof('struct module_sect_attr')
        mod_sect_attrs_offset = self.field_offset('struct module', 'sect_attrs')
        mod_state_offset = self.field_offset('struct module', 'state')
        mod_attr_grp_name_offest = self.field_offset('struct module_sect_attrs', 'grp') + self.field_offset('struct attribute_group', 'name')
        module_states = self.gdbmi.get_enum_lookup_table('module_state', 5)    # 取模块状态

        next_list_ent = self.read_pointer(mod_list + next_offset) # 通过模块的链表头+偏移获取下一个指针地址
        while next_list_ent and next_list_ent != mod_list:    # 链表遍历,每次拿到的是 list_head,然后减去偏移算出 struct module * 的地址
            module = next_list_ent - list_offset    # 获取当前模块的结构体首地址

            mod_tbl_ent = module_table.module_table_entry()  # 创建一个module table entry
            mod_tbl_ent.name = self.read_cstring(module + name_offset) # 从模块地址+name的偏移的地址读取name,填充module table entry
            state = self.read_u32(module + mod_state_offset) # 读取状态
            if mod_tbl_ent.name is None or state is None or state > len(module_states) or module_states[state] not in ['MODULE_STATE_LIVE']:
                msg = 'module state @{:x}'.format(module)    # 如果模块名或状态无效,或不是 LIVE 状态,打印信息并跳到下一个模块
                if mod_tbl_ent.name:
                    msg += ' [{}]'.format(mod_tbl_ent.name)
                msg += ' is {}'.format(state)
                if state is not None and state < len(module_states):
                    msg += '({})'.format(module_states[state])
                print_out_str(msg)
                next_list_ent = self.read_pointer(next_list_ent + next_offset)
                continue
            mod_tbl_ent.module_offset = self.read_pointer(module + module_core_offset)
            if mod_tbl_ent.module_offset is None:
                mod_tbl_ent.module_offset = 0
            mod_tbl_ent.kallsyms_addr = self.read_pointer(module + kallsyms_offset)
            # Loop through sect_attrs
            mod_tbl_ent.section_offsets = {}
            mod_sect_attrs = self.read_pointer(module + mod_sect_attrs_offset)  # module.sect_attrs
            if self.read_cstring(self.read_pointer(mod_sect_attrs + mod_attr_grp_name_offest))  != 'sections': # 有些 RAMDUMP 在模块刚加载的时候节区信息还不完整,所以增加检查,如果没法读取到叫 "sections" 的 attribute group,就跳过这个模块
                # Observed some ramdumps did not have proper attribute set up yet when module is being loaded.
                # "LIVE" state check not good enough, so add one more sanity check
                print_out_str('Unexpected variation in module section group name, skipping loading sections for {}'.format(mod_tbl_ent.name))
                next_list_ent = self.read_pointer(next_list_ent + next_offset)
                continue
            for i in range(0, self.read_u32(mod_sect_attrs + nsections_offset)): # 逐个 section 读取其名字与地址,只记录常见节区:.data, .bss, .text 等(不然太多没用信息)
                # attr_ptr = module.sect_attrs.attrs[i]
                attr_ptr = mod_sect_attrs + section_attrs_offset + (i * section_attr_size)
                # sect_name = attr_ptr.battr.attr.name (for 5.4+)
                sect_name = self.read_cstring(self.read_pointer(attr_ptr + sect_name_offset))
                # sect_addr = attr_ptr.address
                sect_addr = self.read_word(attr_ptr + sect_addr_offset)
                # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/gdb/linux/symbols.py?h=v5.14#n102
                if sect_name not in ['.data', '.data..read_mostly', '.rodata', '.bss',
                                     '.text', '.text.bss', '.text.hot', '.text.unlikely']:
                    continue
                mod_tbl_ent.section_offsets[sect_name] = sect_addr
            if self.is_config_defined('CONFIG_SMP'):
                percpu_size = self.read_u32(module + percpu_size_offset)
                if percpu_size != 0:
                    percpu_pointer = self.read_pointer(module + percpu_offset)
                    mod_tbl_ent.section_offsets['.data..percpu'] = percpu_pointer
            self.module_table.add_entry(mod_tbl_ent) # 将当前模块的信息保存到全局模块表中

            next_list_ent = self.read_pointer(next_list_ent + next_offset) 

主要任务:

  1. 遍历模块链表

    • modules 链表头开始,遍历系统中所有加载的内核模块。

  2. 筛选有效模块

    • 仅处理状态为 MODULE_STATE_LIVE 的模块,跳过还未加载完成或已卸载的模块。

  3. 提取模块信息

    • 模块名称(name

    • 模块加载到内存中的地址(module_core / core_layout.base 等)

    • 模块符号地址(kallsyms

  4. 提取模块的节区(section)信息

    • .text, .data, .rodata, .bss 等常见节区的地址

    • 如果启用了 SMP(多核),还会提取 .data..percpu 的地址

  5. 保存模块信息到全局模块表中

    self.module_table 里现在就会存储所有模块的信息

模块解析

    def parse_module_symbols(self):
        for mod_tbl_ent in self.module_table.module_table:  # 遍历module_table, 跳过name为空的module
            if mod_tbl_ent.name is None:
                print_out_str('!! Object name not extracted properly..checking next!!')
                continue
            self.parse_symbols_of_one_module(mod_tbl_ent, self.ko_file_dict) # 解析模块

    def parse_symbols_of_one_module(self, mod_tbl_ent, ko_file_dict):
        name_index = [s for s in ko_file_dict.keys() if mod_tbl_ent.name in s]
        if len(name_index) == 0:
            print_out_str('!! Object not found for {}'.format(mod_tbl_ent.name))
            return

        if mod_tbl_ent.name not in ko_file_dict and name_index[0] in ko_file_dict:
            temp_data = ko_file_dict[name_index[0]]
            del ko_file_dict[name_index[0]]
            ko_file_dict[mod_tbl_ent.name] = temp_data
        if not mod_tbl_ent.set_sym_path(ko_file_dict[mod_tbl_ent.name]):
            return

        if self.is_config_defined("CONFIG_KALLSYMS") and not self.minidump:
            symtab_offset = self.field_offset('struct mod_kallsyms', 'symtab')
            num_symtab_offset = self.field_offset('struct mod_kallsyms', 'num_symtab')
            strtab_offset = self.field_offset('struct mod_kallsyms', 'strtab')

            if self.arm64:
                sym_struct_name = 'struct elf64_sym'
                sym_struct_size = self.sizeof(sym_struct_name)
            else:
                sym_struct_name = 'struct elf32_sym'
                sym_struct_size = self.sizeof(sym_struct_name)

            st_info_offset = self.field_offset(sym_struct_name, 'st_info')
            symtab = self.read_pointer(mod_tbl_ent.kallsyms_addr + symtab_offset)
            num_symtab = self.read_pointer(mod_tbl_ent.kallsyms_addr + num_symtab_offset)
            strtab = self.read_pointer(mod_tbl_ent.kallsyms_addr + strtab_offset)

            if symtab is None or num_symtab is None or strtab is None:
                return

            KSYM_NAME_LEN = 128
            for i in range(0, num_symtab):
                elf_sym = symtab + sym_struct_size * i
                st_value = self.read_structure_field(elf_sym, sym_struct_name, 'st_value')
                st_info = self.read_byte(elf_sym + st_info_offset)
                sym_type = chr(st_info)
                st_name = self.read_structure_field(elf_sym, sym_struct_name, 'st_name')
                sym_addr = st_value
                sym_name = self.read_cstring(strtab + st_name, KSYM_NAME_LEN)
                st_shndx = self.read_structure_field(elf_sym, sym_struct_name, 'st_shndx')
                st_size = self.read_structure_field(elf_sym, sym_struct_name, 'st_size')

                ###
                # FORMAT of record:
                # sym_addr, syn_name[mod_name], sym_type, idx_elf_sym, st_name, st_shndx, st_size
                ###
                if (sym_name is None or mod_tbl_ent.name is None):
                    continue
                """
                see include/uapi/linux/elf.h
                #define STT_FUNC    2
                ...
                #define ELF_ST_TYPE(x)		((x) & 0xf)
                """
                if st_info & 0xf == 2:
                    # only add FUNC type symbols to avoid built-in symbols
                    # being treated as belonging to a particular kernel module
                    mod_tbl_ent.kallsyms_table.append(
                        (sym_addr, sym_name + '[' + mod_tbl_ent.name + ']', sym_type, i,
                         st_name, st_shndx, st_size,sym_name))
            mod_tbl_ent.kallsyms_table.sort()
            if self.dump_module_kallsyms:
                self.dump_mod_kallsyms_sym_table(mod_tbl_ent.name, mod_tbl_ent.kallsyms_table)
        else:
            args = [self.nm_path, '-n', mod_tbl_ent.get_sym_path()]
            p = subprocess.run(args, stdout=subprocess.PIPE)
            symbols = p.stdout.decode().splitlines()
            for line in symbols:
                s = line.split(' ')
                if len(s) == 3:
                    mod_tbl_ent.sym_lookup_table.append(
                        (int(s[0], 16) + mod_tbl_ent.module_offset,
                        s[2].rstrip() + '[' + mod_tbl_ent.name + ']'))
            mod_tbl_ent.sym_lookup_table.sort()
            if self.dump_module_symbol_table:
                self.dump_mod_sym_table(mod_tbl_ent.name, mod_tbl_ent.sym_lookup_table)
  • mod_tbl_ent: 表示一个模块的信息对象,之前已经通过 retrieve_modules() 构建好了,里面有模块名、内存地址、kallsyms 地址等。

  • ko_file_dict: 一个 .ko 文件名到路径或内容的映射字典(模块名或文件路径 -> .ko 文件路径),用于辅助提取符号。

parse_symbols_of_one_module() 会根据提取方式,把模块的所有函数级符号保存到 mod_tbl_entkallsyms_tablesym_lookup_table 字段中

输出模块 symbols kallsyms

这依赖于是否传参--dump_mod_kallsyms,默认这个是false,如果需要解析这个,可以加上这个参数

    def dump_mod_kallsyms_sym_table(self, mod_name, mod_kallsyms_table):
        kallsyms_header_format = '{0: >18} {1} {2: >64} {3} {4} {5} {6}\n'
        kallsyms_record_format = '0x{0:0>16x} {1: >8} {2: >64} {3: >11} {4: >7} {5: >8} {6: >7}\n'
        kallsyms_file = self.open_file('sym_tbl_kallsyms_'+mod_name+'.txt')
        kallsyms_file.write('KALLSYMS symbol lookup table['+mod_name+']\n')
        kallsyms_file.write(
            kallsyms_header_format.format(
                'sym_addr', 'sym_type', 'syn_name[mod_name]', 'idx_elf_sym',
                'st_name', 'st_shndx', 'st_size'))
        for mod_sym_line in mod_kallsyms_table:
            kallsyms_file.write(
                kallsyms_record_format.format(
                    mod_sym_line[0], mod_sym_line[2], mod_sym_line[1], mod_sym_line[3],
                    hex(mod_sym_line[4]), mod_sym_line[5], mod_sym_line[6]))
        kallsyms_file.close()

`

加载symbols

        if self.module_table.sym_paths_exist():
             # ...
            self.gdbmi.setup_module_table(self.module_table)   # 调用gdb加载symbols

    def setup_module_table(self, module_table):
        self.mod_table = module_table
        for mod in self.mod_table.module_table:
            if not mod.get_sym_path():
                continue
            load_mod_sym_cmd = ['add-symbol-file', mod.get_sym_path().replace('\\', '\\\\')]  # 设置加载驱动symbol的指令
            if ".text" not in mod.section_offsets.keys():
                load_mod_sym_cmd += ['0x{:x}'.format(mod.module_offset - self.kaslr_offset)]
            for segment, offset in mod.section_offsets.items():
                load_mod_sym_cmd += ['-s', segment, '0x{:x}'.format(offset - self.kaslr_offset) ]
            self._run(' '.join(load_mod_sym_cmd))  # 执行gdb指令加载symbol

输出global_symbol_table

这依赖于是否传参--dump_glb_sym_tbl,默认这个是false,如果需要解析这个,可以加上这个参数

            if self.dump_global_symbol_table:
                self.dump_global_symbol_lookup_table()

    def dump_global_symbol_lookup_table(self):
        sym_dump_file = self.open_file('sym_table.txt')
        for line in self.lookup_table:
            sym_dump_file.write('0x{0:x} {1}\n'.format(line[0], line[1]))
        sym_dump_file.close()

至此,在ramparse.py中的Ramdump基类的初始化动作完成

    dump = RamDump(options, nm_path, gdb_path, objdump_path,gdb_ndk_path)

下面开始将会分析每一个插件的实现逻辑,从插件解析dump的逻辑角度去查看内核的一些知识点