作为Linux ramdump parser工具的Ramdump.py中,定义了核心的数据结构Ramdump。这个类非常庞大,几千行代码。定义了非常多的函数结构,我们需要了解这些接口,明白其原理才能在后续的插件开发中得心应手!
故本章节讲解class Ramdump的核心函数,且只介绍一些非常常用的接口
常用接口
get_kernel_version
def get_kernel_version(self):
if self.kernel_version == (0, 0, 0):
vm_v = self.gdbmi.get_value_of_string('linux_banner')
if vm_v is None:
print_out_str('!!! Could not read linux_banner from vmlinux!')
sys.exit(1)
v = re.search('Linux version (\d{0,2}\.\d{0,2}\.\d{0,3})', vm_v)
if v is None:
print_out_str('!!! Could not extract version info!')
sys.exit(1)
self.version = v.group(1)
match = re.search('(\d+)\.(\d+)\.(\d+)', self.version)
if match is not None:
self.version = tuple(map(int, match.groups()))
self.kernel_version = self.version
self.linux_banner = vm_v
else:
print_out_str('!!! Could not extract version info! {0}'.format(self.version))
sys.exit(1)
return self.kernel_version
通过调用ramdump实例的get_kernel_version函数可以得到kernel版本号
其实我们可以看到其实获取版本号的函数self.gdbmi.get_value_of_string
,这个是gdbmi库的标准接口
在ramdump初始化时被初始化,实际上就是通过gdb解析vmlinux
有点类似 strings
工具的执行结果
strings vmlinux |grep "Linux_banner"
这个get_kernel_version
还是非常有用的,因为kernel版本的不一致,一些内核结构发生的一些变化,所以解析时也需要通过内核版本进行区分,比如
if self.get_kernel_version() >= (5, 4):
self.page_offset = -(1 << self.va_bits) % (1 << 64)
if self.address_of('__start_init_task') is not None:
self.thread_size = self.address_of('__end_init_task') - self.address_of('__start_init_task')
else:
self.thread_size = self.address_of('__end_init_stack') - self.address_of('__start_init_stack')
else:
self.page_offset = 0xffffffc000000000
self.thread_size = 16384
address_of
def address_of(self, symbol):
cached_data = self.cached_data['addressof']
kaslr_tmp = self.get_kaslr_offset()
if kaslr_tmp in cached_data:
if symbol in cached_data[kaslr_tmp]:
return cached_data[kaslr_tmp][symbol]
"""Returns the address of a symbol.
:param symbol: name of the symbol.
:type symbol: str
:return: address value
Example:
>>> hex(dump.address_of('linux_banner'))
'0xffffffc000c7a0a8L'
"""
if kaslr_tmp not in cached_data:
cached_data[kaslr_tmp] = {}
try:
r = self.gdbmi.address_of(symbol)
cached_data[kaslr_tmp][symbol] = r
return r
except gdbmi.GdbMIException:
if self.hyp:
try:
r = self.gdbmi_hyp.address_of(symbol)
cached_data[kaslr_tmp][symbol] = r
return r
except gdbmi.GdbMIException:
pass
这个函数的功能就是获取指定的symbol的地址。那如何使用呢?
mod_list = self.address_of('modules')
_text = self.address_of('_text')
per_cpu_offset_addr = self.address_of('__per_cpu_offset')
cache_base_addr = self.ramdump.address_of('l2_dump')
mem_dump_data = self.ramdump.address_of('mem_dump_data')
field_offset
def field_offset(self, the_type, field):
cached_data = self.cached_data['fieldoffset']
if the_type in cached_data:
if field in cached_data[the_type]:
return cached_data[the_type][field]
"""Gets the offset of a field from the base of its containing struct.
This can be useful when reading struct fields, although you should
consider using :func:`~read_structure_field` if
you're reading a word-sized value.
Example:
>>> dump.field_offset('struct device', 'bus')
168
"""
if the_type not in cached_data:
cached_data[the_type] = {}
try:
r = self.gdbmi.field_offset(the_type, field)
cached_data[the_type][field] = r
return r
except gdbmi.GdbMIException:
if self.hyp:
try:
r = self.gdbmi_hyp.field_offset(the_type, field)
cached_data[the_type][field] = r
return r
except gdbmi.GdbMIException:
pass
函数功能:返回结构体the_type的成员field的offset
用法:
next_offset = self.field_offset('struct list_head', 'next')
list_offset = self.field_offset('struct module', 'list')
name_offset = self.field_offset('struct module', 'name')
container_of
def container_of(self, ptr, the_type, member):
"""Like ``container_of`` in the kernel."""
try:
return self.gdbmi.container_of(ptr, the_type, member)
except gdbmi.GdbMIException:
if self.hyp:
try:
return self.gdbmi_hyp.container_of(ptr, the_type, member)
except gdbmi.GdbMIException:
pass
函数功能:和内核container_of
类似
read_cstring
def read_cstring(self, addr_or_name, max_length=100, virtual=True,
cpu=None, allow_elf=False):
"""Reads a C string."""
addr = addr_or_name
s = None
if virtual:
if cpu is not None:
pcpu_offset = self.per_cpu_offset(cpu)
addr_or_name = self.resolve_virt(addr_or_name)
addr_or_name += pcpu_offset + self.per_cpu_offset(cpu)
addr = self.virt_to_phys(addr_or_name)
if allow_elf and addr is None:
s = self.gdbmi.read_memory(addr_or_name, '{}+{}'.format(addr_or_name, max_length))
if not s:
s = self.read_physical(addr, max_length)
if s is not None:
a = s.decode('ascii', 'ignore')
return a.split('\0')[0]
else:
return s
函数功能:从某地址读取字符串
用法:
self.ramdump.read_cstring(pointer)
mod_name = self.read_cstring(mod + name_offset)
read_structure_field
def read_structure_field(self, addr_or_name, struct_name, field, virtual=True):
"""reads a 4 or 8 byte field from a structure"""
size = self.sizeof("(({0} *)0)->{1}".format(struct_name, field))
addr = self.resolve_virt(addr_or_name)
if addr is None or size is None:
return None
addr += self.field_offset(struct_name, field)
if size == 1:
return self.read_byte(addr, virtual)
if size == 2:
return self.read_u16(addr, virtual)
if size == 4:
return self.read_u32(addr, virtual)
if size == 8:
return self.read_u64(addr, virtual)
return None
函数用法:从一个结构体里读取一个成员(普通变量)
用法:
ver = int(self.read_structure_field(socinfo, 'struct socinfo', 'ver') or 0)
serial_num = int(self.read_structure_field(socinfo, 'struct socinfo', 'serial_num') or 0)
read_structure_cstring
def read_structure_cstring(self, addr_or_name, struct_name, field,
max_length=100):
"""reads a C string from a structure field. The C string field will be
dereferenced before reading, so it should be a ``char *``, not a
``char []``.
"""
virt = self.resolve_virt(addr_or_name)
cstring_addr = virt + self.field_offset(struct_name, field)
return self.read_cstring(self.read_pointer(cstring_addr), max_length)
函数用法:从结构体中读取一个成员(char *类型)
用法:
self.ramdump.read_structure_cstring(zram_addr, 'struct zram', 'comp_algs', self.CRYPTO_MAX_ALG_NAME)
关于接口部分还有很多,具体可以自己查看,这里只介绍这几种常用的
read_elf_memory
def read_elf_memory(self, addr, length, temp_file):
s = self.gdbmi.read_elf_memory(addr, length, temp_file)
if s is not None:
a = s.decode('ascii', 'ignore')
return a.split('\0')[0]
else:
return s
主要用于从 ELF 文件中提取字符串信息。
Ramdump类的初始化
在ramparse.py中我们定义了实例类ramdump,所以就会执行Ramdump的初始化代码,下面分开描述初始化流程干了哪些东西?这里忽略Ramdump结构体里的一些成员的初始化赋值,以及一下关于minidump/fulldump解析的区分的代码
解析Kconfig
self.config = []
self.config_dict = {}
if not self.get_config():
print_out_str('!!! Could not get saved configuration')
print_out_str(
'!!! This is really bad and probably indicates RAM corruption')
print_out_str('!!! Some features may be disabled!')
# extract kernel's configuration to kconfig.txt
saved_config = self.open_file('kconfig.txt')
for l in self.config:
saved_config.write(l + '\n')
saved_config.close()
通过调用get_config函数填充self.config列表,然后根据列表内容写入到kconfig.txt
def get_config(self):
kconfig_addr = self.address_of('kernel_config_data') # 读取kernel_config_data的地址
if kconfig_addr is None:
return
if self.get_kernel_version() > (5, 0, 0): # 判断kernel版本是否大于5.0.0
kconfig_addr_end = self.address_of('kernel_config_data_end') # 读取kernel_config_data_end的地址
if kconfig_addr_end is None:
return
kconfig_size = kconfig_addr_end - kconfig_addr # kconfig的size大小
# magic is 8 bytes before kconfig_addr and data
# starts at kconfig_addr for kernel > 5.0.0
kconfig_addr = kconfig_addr - 8 # kconfig_addr的实际地址要去除8byte的magic
else:
kconfig_size = self.sizeof('kernel_config_data')
# size includes magic, offset from it
kconfig_size = kconfig_size - 16 - 1
# kconfig data starts with magic 8 byte string, go past that
zconfig = os.path.join(self.outdir, "elf_temp.txt") # 存储解析的临时elf
temp_file = open(zconfig, 'wb+')
size = kconfig_addr + 8
s = self.read_elf_memory(kconfig_addr, size, temp_file) # 从elf文件中也就是vmlinux读取8个字符串写到临时文件
temp_file.close()
if s != 'IKCFG_ST': # 判断前面8byte字符是不是符合规范
return
temp_file = open(zconfig, 'wb+')
kconfig_addr = kconfig_addr + 8
val = self.read_elf_memory(kconfig_addr, kconfig_size + kconfig_addr,
temp_file) # 继续读后面的
temp_file.close()
zconfig_in = gzip.open(temp_file.name, 'rt') # 看起来读出来的是一个gzip压缩包
try:
t = zconfig_in.readlines() # 从压缩包里读出字符串
except:
return False
zconfig_in.close()
os.remove(zconfig) # 删除临时文件
for l in t:
self.config.append(l.rstrip()) # 加到self.config中
if not l.startswith('#') and l.strip() != '':
eql = l.find('=')
cfg = l[:eql]
val = l[eql+1:]
self.config_dict[cfg] = val.strip() # 这里是将kconfig中的非#开头以及非空行进行解析,存储到self.config_dict
return True
这个函数执行后会产生kconfig.txt以及self.config_dict(如果需要查询某个config配置项,就很方便了)
也提供了现成的接口可以调用,也就get_config_val
get_config_val
def get_config_val(self, config):
"""Gets the value of a kernel config option.
Example:
>>> va_bits = int(dump.get_config_val("CONFIG_ARM64_VA_BITS"))
39
"""
return self.config_dict.get(config)
函数功能:获取内核config项的值
用法:
self.pgtable_levels = int(self.get_config_val("CONFIG_PGTABLE_LEVELS"))
解析出kernel symbols table
这依赖于是否传参--dump_krnl_sym_tbl
,默认这个是false,如果需要解析这个,可以加上这个参数
if self.dump_kernel_symbol_table:
self.dump_mod_sym_table('vmlinux', self.lookup_table)
def dump_mod_sym_table(self, mod_name, sym_lookup_tbl):
sym_dump_file = self.open_file('sym_tbl_'+mod_name+'.txt')
for line in sym_lookup_tbl:
sym_dump_file.write('0x{0:x} {1}\n'.format(line[0], line[1]))
sym_dump_file.close()
vmlinux和dump检测是否匹配
if not self.match_version():
print_out_str('!!! Could not get the Linux version!')
print_out_str(
'!!! Your vmlinux is probably wrong for these dumps')
print_out_str('!!! Exiting now')
sys.exit(1)
def match_version(self):
banner_addr = self.address_of('linux_banner') # 获取linux_banner的地址
if banner_addr is not None:
banner_addr = self.kernel_virt_to_phys(banner_addr) # 虚拟地址转物理地址
banner_len = len(self.linux_banner)
b = self.read_cstring(banner_addr, banner_len, False) # 从ramdump中读出banner
if b is None:
print_out_str('!!! Banner not found in dumps!')
return False
print_out_str('Linux Banner: ' + b.rstrip())
if str(self.linux_banner) in str(b): # ramdump中的banner和vmlinx的banner比较
print_out_str("Linux banner from vmlinux = %s" % self.linux_banner) # self.linux_banner是再get_kernel_version时读vmlinux获取的
print_out_str("Linux banner from dump = %s" % b)
return True
else:
print_out_str("Expected Linux banner = %s" % self.linux_banner)
print_out_str("Linux banner in Dumps = %s" % b)
return False
else:
print_out_str('!!! linux_banner sym not found in vmlinux')
return False
modules symbols加载
if self.module_table.sym_paths_exist():
self.setup_module_symbols()
self.gdbmi.setup_module_table(self.module_table)
if self.dump_global_symbol_table:
self.dump_global_symbol_lookup_table()
获知modules symbols的路径
modules的symbols的路径是通过传参得到的,也就是-m
参数。
# Save all paths given from --mod_path option. These will be searched for .ko.unstripped files
if options.mod_path_list:
for path in options.mod_path_list:
self.module_table.add_sym_path(path)
def add_sym_path(self, sym_path):
if sym_path is None:
print_out_str('sym_path: not specified!')
return False
elif not os.path.exists(sym_path):
print_out_str('sym_path: ' + sym_path + ' not valid or directory doesn\'t exist')
return False
else:
self.sym_path_list.append(sym_path)
return True
执行后,self.sym_path_list列表中就保存了所有的ko symbols的路径
设置module symbols
def setup_module_symbols(self):
self.traverse_module()
if self.minidump:
self.retrieve_minidump_modules()
else:
self.retrieve_modules()
self.parse_module_symbols();
self.add_symbols_to_global_lookup_table()
遍历模块填充模块相关结构体
def traverse_module(self):
for path in self.module_table.sym_path_list:
def on_file(file):
if file.endswith('.ko.unstripped'):
name = file[:-len('.ko.unstripped')]
elif file.endswith('.ko'):
name = file[:-len('.ko')]
else:
return
name = os.path.basename(name)
name = name.replace("-","_")
# Prefer .ko.unstripped
if self.ko_file_dict.get(name, '').endswith('.ko.unstripped') and file.endswith('.ko'):
return
# Prefer ko with debug info
if name in self.ko_file_dict and self.has_debug_info(self.ko_file_dict.get(name)):
return
self.ko_file_dict[name] = file
self.ko_file_names.append(name)
self.walk_depth(path, on_file)
这个函数目的就是将这些module的symbols填充到self.ko_file_dict字典以及self.ko_file_names列表中
检索模块
这部分逻辑比较复杂,看代码备注把
def retrieve_modules(self):
mod_list = self.address_of('modules') # 获取内核符号 modules 的虚拟地址,表示模块链表头
next_offset = self.field_offset('struct list_head', 'next') # 获取链表结构体 list_head 中 next 字段的偏移
list_offset = self.field_offset('struct module', 'list') # 获取模块结构体 module 中 list 字段的偏移
name_offset = self.field_offset('struct module', 'name') # 获取模块结构体 module 中 name 字段的偏移
if self.is_config_defined('CONFIG_SMP'):
percpu_offset = self.field_offset('struct module', 'percpu') # 如果配置了 CONFIG_SMP,获取 percpu 和 percpu_size 字段偏移
percpu_size_offset = self.field_offset('struct module', 'percpu_size')
if self.kernel_version >= (6, 4, 0):
module_core_offset = self.field_offset('struct module', 'mem[0].base') # 获取模块核心基址字段的偏移,不同内核版本struct module结构体不一样
elif self.kernel_version > (4, 9, 0):
module_core_offset = self.field_offset('struct module', 'core_layout.base')
else:
module_core_offset = self.field_offset('struct module', 'module_core')
if self.field_offset('struct module_sect_attr', 'battr') is not None: # 获取模块节属性名的偏移(有的内核有 battr,有的没有)
sect_name_offset = self.field_offset('struct module_sect_attr', 'battr') + self.field_offset('struct bin_attribute', 'attr') + self.field_offset('struct attribute', 'name')
else:
sect_name_offset = self.field_offset('struct module_sect_attr', 'name')
kallsyms_offset = self.field_offset('struct module', 'kallsyms') # 获取模块结构体中其他相关字段的偏移和大小,包括 kallsyms、节地址、节数量、节属性、状态等
sect_addr_offset = self.field_offset('struct module_sect_attr', 'address')
nsections_offset = self.field_offset('struct module_sect_attrs', 'nsections')
section_attrs_offset = self.field_offset('struct module_sect_attrs', 'attrs')
section_attr_size = self.sizeof('struct module_sect_attr')
mod_sect_attrs_offset = self.field_offset('struct module', 'sect_attrs')
mod_state_offset = self.field_offset('struct module', 'state')
mod_attr_grp_name_offest = self.field_offset('struct module_sect_attrs', 'grp') + self.field_offset('struct attribute_group', 'name')
module_states = self.gdbmi.get_enum_lookup_table('module_state', 5) # 取模块状态
next_list_ent = self.read_pointer(mod_list + next_offset) # 通过模块的链表头+偏移获取下一个指针地址
while next_list_ent and next_list_ent != mod_list: # 链表遍历,每次拿到的是 list_head,然后减去偏移算出 struct module * 的地址
module = next_list_ent - list_offset # 获取当前模块的结构体首地址
mod_tbl_ent = module_table.module_table_entry() # 创建一个module table entry
mod_tbl_ent.name = self.read_cstring(module + name_offset) # 从模块地址+name的偏移的地址读取name,填充module table entry
state = self.read_u32(module + mod_state_offset) # 读取状态
if mod_tbl_ent.name is None or state is None or state > len(module_states) or module_states[state] not in ['MODULE_STATE_LIVE']:
msg = 'module state @{:x}'.format(module) # 如果模块名或状态无效,或不是 LIVE 状态,打印信息并跳到下一个模块
if mod_tbl_ent.name:
msg += ' [{}]'.format(mod_tbl_ent.name)
msg += ' is {}'.format(state)
if state is not None and state < len(module_states):
msg += '({})'.format(module_states[state])
print_out_str(msg)
next_list_ent = self.read_pointer(next_list_ent + next_offset)
continue
mod_tbl_ent.module_offset = self.read_pointer(module + module_core_offset)
if mod_tbl_ent.module_offset is None:
mod_tbl_ent.module_offset = 0
mod_tbl_ent.kallsyms_addr = self.read_pointer(module + kallsyms_offset)
# Loop through sect_attrs
mod_tbl_ent.section_offsets = {}
mod_sect_attrs = self.read_pointer(module + mod_sect_attrs_offset) # module.sect_attrs
if self.read_cstring(self.read_pointer(mod_sect_attrs + mod_attr_grp_name_offest)) != 'sections': # 有些 RAMDUMP 在模块刚加载的时候节区信息还不完整,所以增加检查,如果没法读取到叫 "sections" 的 attribute group,就跳过这个模块
# Observed some ramdumps did not have proper attribute set up yet when module is being loaded.
# "LIVE" state check not good enough, so add one more sanity check
print_out_str('Unexpected variation in module section group name, skipping loading sections for {}'.format(mod_tbl_ent.name))
next_list_ent = self.read_pointer(next_list_ent + next_offset)
continue
for i in range(0, self.read_u32(mod_sect_attrs + nsections_offset)): # 逐个 section 读取其名字与地址,只记录常见节区:.data, .bss, .text 等(不然太多没用信息)
# attr_ptr = module.sect_attrs.attrs[i]
attr_ptr = mod_sect_attrs + section_attrs_offset + (i * section_attr_size)
# sect_name = attr_ptr.battr.attr.name (for 5.4+)
sect_name = self.read_cstring(self.read_pointer(attr_ptr + sect_name_offset))
# sect_addr = attr_ptr.address
sect_addr = self.read_word(attr_ptr + sect_addr_offset)
# https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/gdb/linux/symbols.py?h=v5.14#n102
if sect_name not in ['.data', '.data..read_mostly', '.rodata', '.bss',
'.text', '.text.bss', '.text.hot', '.text.unlikely']:
continue
mod_tbl_ent.section_offsets[sect_name] = sect_addr
if self.is_config_defined('CONFIG_SMP'):
percpu_size = self.read_u32(module + percpu_size_offset)
if percpu_size != 0:
percpu_pointer = self.read_pointer(module + percpu_offset)
mod_tbl_ent.section_offsets['.data..percpu'] = percpu_pointer
self.module_table.add_entry(mod_tbl_ent) # 将当前模块的信息保存到全局模块表中
next_list_ent = self.read_pointer(next_list_ent + next_offset)
主要任务:
遍历模块链表:
从
modules
链表头开始,遍历系统中所有加载的内核模块。
筛选有效模块:
仅处理状态为
MODULE_STATE_LIVE
的模块,跳过还未加载完成或已卸载的模块。
提取模块信息:
模块名称(
name
)模块加载到内存中的地址(
module_core
/core_layout.base
等)模块符号地址(
kallsyms
)
提取模块的节区(section)信息:
如
.text
,.data
,.rodata
,.bss
等常见节区的地址如果启用了 SMP(多核),还会提取
.data..percpu
的地址
保存模块信息到全局模块表中
self.module_table 里现在就会存储所有模块的信息
模块解析
def parse_module_symbols(self):
for mod_tbl_ent in self.module_table.module_table: # 遍历module_table, 跳过name为空的module
if mod_tbl_ent.name is None:
print_out_str('!! Object name not extracted properly..checking next!!')
continue
self.parse_symbols_of_one_module(mod_tbl_ent, self.ko_file_dict) # 解析模块
def parse_symbols_of_one_module(self, mod_tbl_ent, ko_file_dict):
name_index = [s for s in ko_file_dict.keys() if mod_tbl_ent.name in s]
if len(name_index) == 0:
print_out_str('!! Object not found for {}'.format(mod_tbl_ent.name))
return
if mod_tbl_ent.name not in ko_file_dict and name_index[0] in ko_file_dict:
temp_data = ko_file_dict[name_index[0]]
del ko_file_dict[name_index[0]]
ko_file_dict[mod_tbl_ent.name] = temp_data
if not mod_tbl_ent.set_sym_path(ko_file_dict[mod_tbl_ent.name]):
return
if self.is_config_defined("CONFIG_KALLSYMS") and not self.minidump:
symtab_offset = self.field_offset('struct mod_kallsyms', 'symtab')
num_symtab_offset = self.field_offset('struct mod_kallsyms', 'num_symtab')
strtab_offset = self.field_offset('struct mod_kallsyms', 'strtab')
if self.arm64:
sym_struct_name = 'struct elf64_sym'
sym_struct_size = self.sizeof(sym_struct_name)
else:
sym_struct_name = 'struct elf32_sym'
sym_struct_size = self.sizeof(sym_struct_name)
st_info_offset = self.field_offset(sym_struct_name, 'st_info')
symtab = self.read_pointer(mod_tbl_ent.kallsyms_addr + symtab_offset)
num_symtab = self.read_pointer(mod_tbl_ent.kallsyms_addr + num_symtab_offset)
strtab = self.read_pointer(mod_tbl_ent.kallsyms_addr + strtab_offset)
if symtab is None or num_symtab is None or strtab is None:
return
KSYM_NAME_LEN = 128
for i in range(0, num_symtab):
elf_sym = symtab + sym_struct_size * i
st_value = self.read_structure_field(elf_sym, sym_struct_name, 'st_value')
st_info = self.read_byte(elf_sym + st_info_offset)
sym_type = chr(st_info)
st_name = self.read_structure_field(elf_sym, sym_struct_name, 'st_name')
sym_addr = st_value
sym_name = self.read_cstring(strtab + st_name, KSYM_NAME_LEN)
st_shndx = self.read_structure_field(elf_sym, sym_struct_name, 'st_shndx')
st_size = self.read_structure_field(elf_sym, sym_struct_name, 'st_size')
###
# FORMAT of record:
# sym_addr, syn_name[mod_name], sym_type, idx_elf_sym, st_name, st_shndx, st_size
###
if (sym_name is None or mod_tbl_ent.name is None):
continue
"""
see include/uapi/linux/elf.h
#define STT_FUNC 2
...
#define ELF_ST_TYPE(x) ((x) & 0xf)
"""
if st_info & 0xf == 2:
# only add FUNC type symbols to avoid built-in symbols
# being treated as belonging to a particular kernel module
mod_tbl_ent.kallsyms_table.append(
(sym_addr, sym_name + '[' + mod_tbl_ent.name + ']', sym_type, i,
st_name, st_shndx, st_size,sym_name))
mod_tbl_ent.kallsyms_table.sort()
if self.dump_module_kallsyms:
self.dump_mod_kallsyms_sym_table(mod_tbl_ent.name, mod_tbl_ent.kallsyms_table)
else:
args = [self.nm_path, '-n', mod_tbl_ent.get_sym_path()]
p = subprocess.run(args, stdout=subprocess.PIPE)
symbols = p.stdout.decode().splitlines()
for line in symbols:
s = line.split(' ')
if len(s) == 3:
mod_tbl_ent.sym_lookup_table.append(
(int(s[0], 16) + mod_tbl_ent.module_offset,
s[2].rstrip() + '[' + mod_tbl_ent.name + ']'))
mod_tbl_ent.sym_lookup_table.sort()
if self.dump_module_symbol_table:
self.dump_mod_sym_table(mod_tbl_ent.name, mod_tbl_ent.sym_lookup_table)
mod_tbl_ent
: 表示一个模块的信息对象,之前已经通过retrieve_modules()
构建好了,里面有模块名、内存地址、kallsyms 地址等。ko_file_dict
: 一个.ko
文件名到路径或内容的映射字典(模块名或文件路径 -> .ko 文件路径),用于辅助提取符号。
parse_symbols_of_one_module()
会根据提取方式,把模块的所有函数级符号保存到 mod_tbl_ent
的 kallsyms_table
或 sym_lookup_table
字段中
输出模块 symbols kallsyms
这依赖于是否传参--dump_mod_kallsyms
,默认这个是false,如果需要解析这个,可以加上这个参数
def dump_mod_kallsyms_sym_table(self, mod_name, mod_kallsyms_table):
kallsyms_header_format = '{0: >18} {1} {2: >64} {3} {4} {5} {6}\n'
kallsyms_record_format = '0x{0:0>16x} {1: >8} {2: >64} {3: >11} {4: >7} {5: >8} {6: >7}\n'
kallsyms_file = self.open_file('sym_tbl_kallsyms_'+mod_name+'.txt')
kallsyms_file.write('KALLSYMS symbol lookup table['+mod_name+']\n')
kallsyms_file.write(
kallsyms_header_format.format(
'sym_addr', 'sym_type', 'syn_name[mod_name]', 'idx_elf_sym',
'st_name', 'st_shndx', 'st_size'))
for mod_sym_line in mod_kallsyms_table:
kallsyms_file.write(
kallsyms_record_format.format(
mod_sym_line[0], mod_sym_line[2], mod_sym_line[1], mod_sym_line[3],
hex(mod_sym_line[4]), mod_sym_line[5], mod_sym_line[6]))
kallsyms_file.close()
`
加载symbols
if self.module_table.sym_paths_exist():
# ...
self.gdbmi.setup_module_table(self.module_table) # 调用gdb加载symbols
def setup_module_table(self, module_table):
self.mod_table = module_table
for mod in self.mod_table.module_table:
if not mod.get_sym_path():
continue
load_mod_sym_cmd = ['add-symbol-file', mod.get_sym_path().replace('\\', '\\\\')] # 设置加载驱动symbol的指令
if ".text" not in mod.section_offsets.keys():
load_mod_sym_cmd += ['0x{:x}'.format(mod.module_offset - self.kaslr_offset)]
for segment, offset in mod.section_offsets.items():
load_mod_sym_cmd += ['-s', segment, '0x{:x}'.format(offset - self.kaslr_offset) ]
self._run(' '.join(load_mod_sym_cmd)) # 执行gdb指令加载symbol
输出global_symbol_table
这依赖于是否传参--dump_glb_sym_tbl
,默认这个是false,如果需要解析这个,可以加上这个参数
if self.dump_global_symbol_table:
self.dump_global_symbol_lookup_table()
def dump_global_symbol_lookup_table(self):
sym_dump_file = self.open_file('sym_table.txt')
for line in self.lookup_table:
sym_dump_file.write('0x{0:x} {1}\n'.format(line[0], line[1]))
sym_dump_file.close()
至此,在ramparse.py中的Ramdump基类的初始化动作完成
dump = RamDump(options, nm_path, gdb_path, objdump_path,gdb_ndk_path)
下面开始将会分析每一个插件的实现逻辑,从插件解析dump的逻辑角度去查看内核的一些知识点。