0. 问题现象
目前协调FAE 从印度前线客户服务中心收集3份日志,初步分析 Slab占用过大3.9G,有Slab内存泄漏的问题,目前需要异常机复现现场问题后确认泄漏的原因。
Slab内存占用高问题,同平台窗口和性能模块核对,case:ALPS09052161,目前来看屏幕卡住不动,跟WMS关系不大,binder方面看起来不算是完全耗尽卡死,但是可以肯出执行都比较缓慢,low memory和 cpu 95% 太高了,是比较可能导致问题的异常点,在这种performance情况下UI是会出现无法及时响应的,Performance team建议,cpu loading重的高负载场景,优化方向只能尽量降低出现的概率,无法完全避免。
1. 问题分析
1.1 打开slabtrace
diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig
index 29e7bba5..5fa4bdd 100644
--- a/arch/arm64/configs/gki_defconfig
+++ b/arch/arm64/configs/gki_defconfig
@@ -46,6 +46,7 @@
CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_SLUB_DEBUG=y
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
@@ -65,7 +66,7 @@
CONFIG_ARM64_MPAM=y
CONFIG_RANDOMIZE_BASE=y
# CONFIG_RANDOMIZE_MODULE_REGION_FULL is not set
-CONFIG_CMDLINE="stack_depot_disable=on kasan.stacktrace=off kvm-arm.mode=protected cgroup_disable=pressure"
+CONFIG_CMDLINE="stack_depot_disable=on kasan.stacktrace=off kvm-arm.mode=protected cgroup_disable=pressure cgroup.memory=nokmem slub_debug=OFZPU"
CONFIG_CMDLINE_EXTEND=y
# CONFIG_DMI is not set
CONFIG_HIBERNATION=y
diff --git a/drivers/misc/mediatek/Makefile b/drivers/misc/mediatek/Makefile
index 03747f8..3acb64f 100644
--- a/drivers/misc/mediatek/Makefile
+++ b/drivers/misc/mediatek/Makefile
@@ -98,6 +98,7 @@
obj-$(CONFIG_USB) += usb/
obj-$(CONFIG_MTK_WIDEVINE_DRM) += widevine_drm/
obj-$(CONFIG_MTK_MT6382_BDG) += spi_slave_drv/
+obj-y += mem/
# BSP.System - 2023.7.1 - add simtray status
obj-$(CONFIG_SIMTRAY_STATUS) += simtray/
#add cpumaxfreq
diff --git a/drivers/misc/mediatek/include/mt-plat/mtk_memcfg.h b/drivers/misc/mediatek/include/mt-plat/mtk_memcfg.h
new file mode 100644
index 0000000..69a58a7
--- /dev/null
+++ b/drivers/misc/mediatek/include/mt-plat/mtk_memcfg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 MediaTek Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __MTK_MEMCFG_H__
+#define __MTK_MEMCFG_H__
+#include <linux/fs.h>
+
+/* late warning flags */
+#define WARN_MEMBLOCK_CONFLICT (1 << 0) /* memblock overlap */
+#define WARN_MEMSIZE_CONFLICT (1 << 1) /* dram info missing */
+#define WARN_API_NOT_INIT (1 << 2) /* API is not initialized */
+
+#define MTK_MEMCFG_MEMBLOCK_PHY 0x1
+#define MTK_MEMCFG_MEMBLOCK_DEBUG 0x2
+
+#define MTK_MEMCFG_LOG_AND_PRINTK(fmt, arg...) pr_info(fmt, ##arg)
+
+extern int slabtrace_open(struct inode *inode, struct file *file);
+
+#define mtk_memcfg_record_freed_reserved(start, end) do {} while (0)
+#define mtk_memcfg_inform_vmpressure() do { } while (0)
+#endif /* end __MTK_MEMCFG_H__ */
diff --git a/drivers/misc/mediatek/mem/Makefile b/drivers/misc/mediatek/mem/Makefile
new file mode 100644
index 0000000..7895b44
--- /dev/null
+++ b/drivers/misc/mediatek/mem/Makefile
@@ -0,0 +1,14 @@
+#
+# Copyright (C) 2015 MediaTek Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+ccflags-y += -I$(srctree)/drivers/misc/mediatek/include
+obj-y += mtk_memcfg.o
diff --git a/drivers/misc/mediatek/mem/mtk_memcfg.c b/drivers/misc/mediatek/mem/mtk_memcfg.c
new file mode 100644
index 0000000..f477203
--- /dev/null
+++ b/drivers/misc/mediatek/mem/mtk_memcfg.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2015 MediaTek Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/seq_file.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/of_fdt.h>
+#include <linux/of.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/mm.h>
+#include <linux/memory.h>
+#include <linux/memblock.h>
+#include <linux/oom.h>
+#include <linux/swap.h>
+#include <linux/sort.h>
+
+#include <asm/setup.h>
+
+#include <mt-plat/mtk_memcfg.h>
+
+#ifdef CONFIG_MTK_AEE_FEATURE
+#include <mt-plat/aee.h>
+#endif
+
+#ifdef CONFIG_SLUB_DEBUG
+/* kenerl slabtrace */
+static const struct proc_ops proc_slabtrace_operations = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
+ .proc_open = slabtrace_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+};
+
+/* end of kernel slabtrace */
+#endif
+static int __init mtk_memcfg_late_init(void)
+{
+ struct proc_dir_entry *entry = NULL;
+ struct proc_dir_entry *mtk_memcfg_dir = NULL;
+
+ mtk_memcfg_dir = proc_mkdir("mtk_memcfg", NULL);
+
+ if (!mtk_memcfg_dir) {
+ pr_info("[%s]: mkdir /proc/mtk_memcfg failed\n", __func__);
+ } else {
+#ifdef CONFIG_SLUB_DEBUG
+ /* slabtrace - full slub object backtrace */
+ entry = proc_create("slabtrace",
+ 0400, mtk_memcfg_dir,
+ &proc_slabtrace_operations);
+
+ if (!entry)
+ pr_info("create slabtrace proc entry failed\n");
+#endif
+ }
+
+ return 0;
+}
+
+late_initcall(mtk_memcfg_late_init);
diff --git a/mm/slab.h b/mm/slab.h
index 61d235e..2f48ad0 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -96,6 +96,16 @@
* Tracking user of a slab.
*/
#define TRACK_ADDRS_COUNT 16
+
+// #ifdef CONFIG_ARM64
+// #ifndef CONFIG_RANDOMIZE_BASE
+#define MTK_COMPACT_SLUB_TRACK
+#define MTK_MEMCFG_SLABTRACE_CNT 5
+#undef TRACK_ADDRS_COUNT
+#define TRACK_ADDRS_COUNT MTK_MEMCFG_SLABTRACE_CNT
+// #endif
+// #endif
+
struct track {
unsigned long addr; /* Called from address */
#ifdef CONFIG_STACKTRACE
diff --git a/mm/slub.c b/mm/slub.c
index a160f2d2..573a695 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -726,6 +726,28 @@
if (addr) {
#ifdef CONFIG_STACKTRACE
+#ifdef MTK_COMPACT_SLUB_TRACK
+ unsigned int nr_entries;
+ unsigned long addrs[TRACK_ADDRS_COUNT];
+ int i;
+
+ memset(addrs, 0, sizeof(addrs));
+
+ metadata_access_enable();
+ nr_entries = stack_trace_save(kasan_reset_tag(addrs),
+ TRACK_ADDRS_COUNT, 3);
+ metadata_access_disable();
+
+ for (i = nr_entries; i < TRACK_ADDRS_COUNT; i++)
+ addrs[i] = 0;
+
+ for (i = 0; i < TRACK_ADDRS_COUNT; i++) {
+ if (addrs[i])
+ p->addrs[i] = addrs[i] - MODULES_VADDR;
+ else
+ p->addrs[i] = 0;
+ }
+#else
unsigned int nr_entries;
metadata_access_enable();
@@ -736,6 +758,7 @@
if (nr_entries < TRACK_ADDRS_COUNT)
p->addrs[nr_entries] = 0;
#endif
+#endif
p->addr = addr;
p->cpu = smp_processor_id();
p->pid = current->pid;
@@ -763,6 +786,28 @@
pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
#ifdef CONFIG_STACKTRACE
+#ifdef MTK_COMPACT_SLUB_TRACK
+ {
+ int i;
+ unsigned long addrs[TRACK_ADDRS_COUNT];
+
+ /* we store the offset after MODULES_VADDR for
+ * kernel module and kernel text address
+ */
+ for (i = 0; i < TRACK_ADDRS_COUNT; i++) {
+ if (t->addrs[i])
+ addrs[i] = MODULES_VADDR + t->addrs[i];
+ else
+ addrs[i] = 0;
+ }
+ for (i = 0; i < TRACK_ADDRS_COUNT; i++) {
+ if (addrs[i])
+ pr_err("\t%pS\n", (void *)addrs[i]);
+ else
+ break;
+ }
+ }
+#else
{
int i;
for (i = 0; i < TRACK_ADDRS_COUNT; i++)
@@ -772,6 +817,7 @@
break;
}
#endif
+#endif
}
void print_tracking(struct kmem_cache *s, void *object)
@@ -1474,7 +1520,8 @@
* Avoid enabling debugging on caches if its minimum
* order would increase as a result.
*/
- higher_order_disable = true;
+ // print kmalloc-4096 and kmalloc-8192 alloc-backtrace
+ disable_higher_order_debug = 0;
break;
default:
if (init)
@@ -5092,6 +5139,9 @@
struct location {
unsigned long count;
unsigned long addr;
+#ifdef CONFIG_STACKTRACE
+ unsigned long addrs[MTK_MEMCFG_SLABTRACE_CNT]; /* caller address */
+#endif
long long sum_time;
long min_time;
long max_time;
@@ -6286,3 +6336,257 @@
return -EIO;
}
#endif /* CONFIG_SLUB_DEBUG */
+
+
+static int mtk_memcfg_add_location(struct loc_track *t, struct kmem_cache *s,
+ const struct track *track)
+{
+ long start, end, pos;
+ struct location *l;
+ /* Caller from addresses */
+ unsigned long (*caddrs)[MTK_MEMCFG_SLABTRACE_CNT];
+ /* Called from addresses of track */
+ unsigned long taddrs[MTK_MEMCFG_SLABTRACE_CNT]
+ = { [0 ... MTK_MEMCFG_SLABTRACE_CNT - 1] = 0,};
+ unsigned long age = jiffies - track->when;
+ int i, cnt;
+
+ start = -1;
+ end = t->count;
+ /* find the index of track->addr */
+ for (i = 0; i < TRACK_ADDRS_COUNT; i++) {
+#ifdef MTK_COMPACT_SLUB_TRACK
+ /* we store the offset after MODULES_VADDR for
+ * kernel module and kernel text address
+ */
+ unsigned long addr = (MODULES_VADDR + track->addrs[i]);
+
+ if (track->addr == addr ||
+ ((track->addr - 4) == addr))
+#else
+ if ((track->addr == track->addrs[i]) ||
+ (track->addr - 4 == track->addrs[i]))
+#endif
+ break;
+ }
+ /* copy all addrs if we cannot match track->addr */
+ if (i == TRACK_ADDRS_COUNT)
+ i = 0;
+ cnt = min(MTK_MEMCFG_SLABTRACE_CNT, TRACK_ADDRS_COUNT - i);
+#ifdef MTK_COMPACT_SLUB_TRACK
+ {
+ int j = 0;
+ unsigned long addrs[TRACK_ADDRS_COUNT];
+
+ for (j = 0; j < TRACK_ADDRS_COUNT; j++) {
+ /* we store the offset after MODULES_VADDR for
+ * kernel module and kernel text address
+ */
+ if (track->addrs[j])
+ addrs[j] = MODULES_VADDR + track->addrs[j];
+ else
+ addrs[j] = 0;
+ }
+ memcpy(taddrs, addrs + i, (cnt * sizeof(unsigned long)));
+ }
+#else
+ memcpy(taddrs, track->addrs + i, (cnt * sizeof(unsigned long)));
+#endif
+
+ for ( ; ; ) {
+ pos = start + (end - start + 1) / 2;
+
+ /*
+ * There is nothing at "end". If we end up there
+ * we need to add something to before end.
+ */
+ if (pos == end)
+ break;
+
+ caddrs = &(t->loc[pos].addrs);
+ if (!memcmp(caddrs, taddrs,
+ MTK_MEMCFG_SLABTRACE_CNT * sizeof(unsigned long))) {
+
+ l = &t->loc[pos];
+ l->count++;
+ if (track->when) {
+ l->sum_time += age;
+ if (age < l->min_time)
+ l->min_time = age;
+ if (age > l->max_time)
+ l->max_time = age;
+
+ if (track->pid < l->min_pid)
+ l->min_pid = track->pid;
+ if (track->pid > l->max_pid)
+ l->max_pid = track->pid;
+
+ cpumask_set_cpu(track->cpu,
+ to_cpumask(l->cpus));
+ }
+ node_set(page_to_nid(virt_to_page(track)), l->nodes);
+ return 1;
+ }
+
+ if (memcmp(caddrs, taddrs,
+ MTK_MEMCFG_SLABTRACE_CNT * sizeof(unsigned long)) < 0)
+ end = pos;
+ else
+ start = pos;
+ }
+
+ /*
+ * Not found. Insert new tracking element.
+ */
+ if (t->count >= t->max &&
+ !alloc_loc_track(t, 2 * t->max, __GFP_HIGH | __GFP_ATOMIC))
+ return 0;
+
+ l = t->loc + pos;
+ if (pos < t->count)
+ memmove(l + 1, l,
+ (t->count - pos) * sizeof(struct location));
+ t->count++;
+ l->count = 1;
+ l->addr = track->addr;
+ memcpy(l->addrs, taddrs,
+ MTK_MEMCFG_SLABTRACE_CNT * sizeof(unsigned long));
+ l->sum_time = age;
+ l->min_time = age;
+ l->max_time = age;
+ l->min_pid = track->pid;
+ l->max_pid = track->pid;
+ cpumask_clear(to_cpumask(l->cpus));
+ cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
+ nodes_clear(l->nodes);
+ node_set(page_to_nid(virt_to_page(track)), l->nodes);
+ return 1;
+}
+
+static void mtk_memcfg_process_slab(struct loc_track *t, struct kmem_cache *s,
+ struct page *page, enum track_item alloc,
+ unsigned long *map)
+{
+ void *addr = page_address(page);
+ void *p;
+
+ bitmap_zero(map, page->objects);
+ //get_map(s, page, map);
+ __fill_map(map, s, page);
+
+ for_each_object(p, s, addr, page->objects)
+ if (!test_bit(__obj_to_index(s, addr, p), map))
+ mtk_memcfg_add_location(t, s, get_track(s, p, alloc));
+}
+
+static int mtk_memcfg_list_locations(struct kmem_cache *s, struct seq_file *m,
+ enum track_item alloc, unsigned long *slabsize, unsigned long *objsize)
+{
+ unsigned long i, j;
+ struct loc_track t = { 0, 0, NULL };
+ int node;
+ unsigned long *map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
+ struct kmem_cache_node *n;
+
+ if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+ GFP_KERNEL)) {
+ kfree(map);
+ seq_puts(m, "Out of memory\n");
+ return 0;
+ }
+ /* Push back cpu slabs */
+ flush_all(s);
+
+ for_each_kmem_cache_node(s, node, n) {
+ unsigned long flags;
+ struct page *page;
+
+ if (!atomic_long_read(&n->nr_slabs))
+ continue;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ mtk_memcfg_process_slab(&t, s, page, alloc, map);
+ list_for_each_entry(page, &n->full, lru)
+ mtk_memcfg_process_slab(&t, s, page, alloc, map);
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ }
+
+ for (i = 0; i < t.count; i++) {
+ struct location *l = &t.loc[i];
+
+ seq_printf(m, "total_objsize:%d count:%d ", l->count*s->object_size, l->count);
+
+ if (s->flags & SLAB_RECLAIM_ACCOUNT)
+ seq_printf(m, "SLAB_RECLAIMABLE ");
+ else {
+ seq_printf(m, "SLAB_UNRECLAIMABLE ");
+ *objsize += l->count * s->object_size;
+ *slabsize += l->count * s->size;
+ }
+
+ if (l->addr)
+ seq_printf(m, "%pS", (void *)l->addr);
+ else
+ seq_puts(m, "<not-available>");
+
+ for (j = 1; j < MTK_MEMCFG_SLABTRACE_CNT; j++)
+ if (l->addrs[j])
+ seq_printf(m, " %pS", (void *)l->addrs[j]);
+
+ seq_puts(m, "\n");
+ }
+
+ free_loc_track(&t);
+ kfree(map);
+
+ if (!t.count)
+ seq_puts(m, "No data\n");
+ return 0;
+}
+
+static int mtk_memcfg_slabtrace_show(struct seq_file *m, void *p)
+{
+ struct kmem_cache *s;
+ unsigned long total_objsize;
+ unsigned long total_size;
+ unsigned long objsize;
+ unsigned long size;
+
+ total_objsize = 0;
+ total_size = 0;
+
+ mutex_lock(&slab_mutex);
+ list_for_each_entry(s, &slab_caches, list) {
+ /* We only want to know the backtraces of kmalloc-*
+ * Backtraces of other kmem_cache can be find easily
+ */
+ //if (!strstr(s->name, "kmalloc-"))
+ // continue;
+ objsize = 0;
+ size = 0;
+
+ seq_printf(m, "======= kmem_cache: %s alloc_calls, objsize=%d =======\n",
+ s->name, s->object_size);
+
+ if (!(s->flags & SLAB_STORE_USER))
+ continue;
+ else
+ mtk_memcfg_list_locations(s, m, TRACK_ALLOC, &size, &objsize);
+
+ seq_printf(m, "======= kmem_cache: %s, total_size=%d total_objsize=%d=======\n",
+ s->name, size, objsize);
+ total_size += size;
+ total_objsize += objsize;
+ }
+ mutex_unlock(&slab_mutex);
+ seq_printf(m, " =======\n");
+ seq_printf(m, "SLAB_UNRECLAIMABLE : total_size =%d =======\n", total_size);
+ seq_printf(m, "SLAB_UNRECLAIMABLE : total_objsize =%d =======\n", total_objsize);
+ return 0;
+}
+
+int slabtrace_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mtk_memcfg_slabtrace_show, NULL);
+}
1.2 抓取meminfo和slabtrace等日志
脚本如下:
@echo off
set "root_DIR=%CD%\meminfo"
if not exist "%root_DIR%" (
mkdir "%root_DIR%"
) else (
echo 目录已存在:%root_DIR%
)
:loop
set "timestamp=%time::=%"
set "timestamp=%timestamp:.=%"
set "filename1=%root_DIR%\meminfo_%timestamp%.txt"
set "filename2=%root_DIR%\slabinfo_%timestamp%.txt"
set "filename3=%root_DIR%\slabtrace_%timestamp%.txt"
set "filename4=%root_DIR%\dumpsys_meminfo_%timestamp%.txt"
set "filename5=%root_DIR%\kmalloc_128_trace_%timestamp%.txt"
echo Log starting.
adb shell "cat /proc/meminfo" > "%filename1%"
adb shell "cat /proc/slabinfo" > "%filename2%"
adb shell "cat /proc/mtk_memcfg/slabtrace" > "%filename3%"
adb shell "dumpsys meminfo" > "%filename4%"
adb shell "cat /sys/kernel/debug/slab/kmalloc-128/alloc_traces" > "%filename5%"
ping -n 20 127.0.0.1 > nul
goto loop
1.3 日志分析
[411137.591831] [T712169] mali-mem-purge: [name:slab_common&]kmalloc-128 1733601KB 1733660KB
61S 增加 39,036KB == 38MB
[411198.405354] [T700264] init: [name:slab_common&]kmalloc-128 1772667KB 1772696KB
[ 3248.571624] [T323562] kworker/3:27H: [name:slab_common&]kmalloc-128 2085599KB 2085600KB
152S 增加 103,028KB == 100MB
[ 3400.434555] [T201310] VSyncThread_0: [name:slab_common&]kmalloc-128 2188627KB 2188628KB
slabinfo 日志:
可以看出kmalloc-128
存在内存泄漏
Slabtrace log:
初步怀疑为pd_dbg_info造成的内存泄漏
1.4 抓取alloc_traces和free_traces
//alloc_traces
3761872 pd_dbg_info+0x168/0x2d4 [pd_dbg_info] age=3948/941474/1383419 pid=152-31633 cpus=0-7
//free_traces
224862 print_out_dwork_fn+0x1d8/0x22c [pd_dbg_info] age=32129/948539/1680953 pid=93-32458 cpus=6-7
pd_dbg_info函数申请内存次数达到3761872 ,而释放的函数print_out_dwork_fn只释放了224862次,存在大量的slab内存没有得到释放
2. 根本原因
调用print_out_dwork_fn工作队列函数释放内存时,会统计这个printed的次数,如果超过了dbg_log_limit,则会启动延迟队列。从log来看明显超过了dbg_log_limit,这就导致大量的slab内存申请后因为延迟队列的原因没有能够得到及时的释放,越来越多最终造成slab内存泄漏。