0. 问题现象

目前协调FAE 从印度前线客户服务中心收集3份日志,初步分析 Slab占用过大3.9G,有Slab内存泄漏的问题,目前需要异常机复现现场问题后确认泄漏的原因。
Slab内存占用高问题,同平台窗口和性能模块核对,case:ALPS09052161,目前来看屏幕卡住不动,跟WMS关系不大,binder方面看起来不算是完全耗尽卡死,但是可以肯出执行都比较缓慢,low memory和 cpu 95% 太高了,是比较可能导致问题的异常点,在这种performance情况下UI是会出现无法及时响应的,Performance team建议,cpu loading重的高负载场景,优化方向只能尽量降低出现的概率,无法完全避免。

1. 问题分析

1.1 打开slabtrace

diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig
index 29e7bba5..5fa4bdd 100644
--- a/arch/arm64/configs/gki_defconfig
+++ b/arch/arm64/configs/gki_defconfig
@@ -46,6 +46,7 @@
 CONFIG_EMBEDDED=y
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_SLUB_DEBUG=y
 CONFIG_SLAB_FREELIST_RANDOM=y
 CONFIG_SLAB_FREELIST_HARDENED=y
 CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
@@ -65,7 +66,7 @@
 CONFIG_ARM64_MPAM=y
 CONFIG_RANDOMIZE_BASE=y
 # CONFIG_RANDOMIZE_MODULE_REGION_FULL is not set
-CONFIG_CMDLINE="stack_depot_disable=on kasan.stacktrace=off kvm-arm.mode=protected cgroup_disable=pressure"
+CONFIG_CMDLINE="stack_depot_disable=on kasan.stacktrace=off kvm-arm.mode=protected cgroup_disable=pressure cgroup.memory=nokmem slub_debug=OFZPU"
 CONFIG_CMDLINE_EXTEND=y
 # CONFIG_DMI is not set
 CONFIG_HIBERNATION=y
diff --git a/drivers/misc/mediatek/Makefile b/drivers/misc/mediatek/Makefile
index 03747f8..3acb64f 100644
--- a/drivers/misc/mediatek/Makefile
+++ b/drivers/misc/mediatek/Makefile
@@ -98,6 +98,7 @@
 obj-$(CONFIG_USB) += usb/
 obj-$(CONFIG_MTK_WIDEVINE_DRM) += widevine_drm/
 obj-$(CONFIG_MTK_MT6382_BDG) += spi_slave_drv/
+obj-y += mem/
 # BSP.System - 2023.7.1 - add simtray status
 obj-$(CONFIG_SIMTRAY_STATUS) += simtray/
 #add cpumaxfreq
diff --git a/drivers/misc/mediatek/include/mt-plat/mtk_memcfg.h b/drivers/misc/mediatek/include/mt-plat/mtk_memcfg.h
new file mode 100644
index 0000000..69a58a7
--- /dev/null
+++ b/drivers/misc/mediatek/include/mt-plat/mtk_memcfg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 MediaTek Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __MTK_MEMCFG_H__
+#define __MTK_MEMCFG_H__
+#include <linux/fs.h>
+
+/* late warning flags */
+#define WARN_MEMBLOCK_CONFLICT	(1 << 0)	/* memblock overlap */
+#define WARN_MEMSIZE_CONFLICT	(1 << 1)	/* dram info missing */
+#define WARN_API_NOT_INIT	(1 << 2)	/* API is not initialized */
+
+#define MTK_MEMCFG_MEMBLOCK_PHY 0x1
+#define MTK_MEMCFG_MEMBLOCK_DEBUG 0x2
+
+#define MTK_MEMCFG_LOG_AND_PRINTK(fmt, arg...) pr_info(fmt, ##arg)
+
+extern int slabtrace_open(struct inode *inode, struct file *file);
+
+#define mtk_memcfg_record_freed_reserved(start, end) do {} while (0)
+#define mtk_memcfg_inform_vmpressure() do { } while (0)
+#endif /* end __MTK_MEMCFG_H__ */
diff --git a/drivers/misc/mediatek/mem/Makefile b/drivers/misc/mediatek/mem/Makefile
new file mode 100644
index 0000000..7895b44
--- /dev/null
+++ b/drivers/misc/mediatek/mem/Makefile
@@ -0,0 +1,14 @@
+#
+# Copyright (C) 2015 MediaTek Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+ccflags-y += -I$(srctree)/drivers/misc/mediatek/include
+obj-y += mtk_memcfg.o
diff --git a/drivers/misc/mediatek/mem/mtk_memcfg.c b/drivers/misc/mediatek/mem/mtk_memcfg.c
new file mode 100644
index 0000000..f477203
--- /dev/null
+++ b/drivers/misc/mediatek/mem/mtk_memcfg.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2015 MediaTek Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/seq_file.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/of_fdt.h>
+#include <linux/of.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/mm.h>
+#include <linux/memory.h>
+#include <linux/memblock.h>
+#include <linux/oom.h>
+#include <linux/swap.h>
+#include <linux/sort.h>
+
+#include <asm/setup.h>
+
+#include <mt-plat/mtk_memcfg.h>
+
+#ifdef CONFIG_MTK_AEE_FEATURE
+#include <mt-plat/aee.h>
+#endif
+
+#ifdef CONFIG_SLUB_DEBUG
+/* kenerl slabtrace  */
+static const struct proc_ops proc_slabtrace_operations = {
+	.proc_flags	= PROC_ENTRY_PERMANENT,
+	.proc_open = slabtrace_open,
+	.proc_read = seq_read,
+	.proc_lseek = seq_lseek,
+	.proc_release = single_release,
+};
+
+/* end of kernel slabtrace */
+#endif
+static int __init mtk_memcfg_late_init(void)
+{
+	struct proc_dir_entry *entry = NULL;
+	struct proc_dir_entry *mtk_memcfg_dir = NULL;
+
+	mtk_memcfg_dir = proc_mkdir("mtk_memcfg", NULL);
+
+	if (!mtk_memcfg_dir) {
+		pr_info("[%s]: mkdir /proc/mtk_memcfg failed\n", __func__);
+	} else {
+#ifdef CONFIG_SLUB_DEBUG
+		/* slabtrace - full slub object backtrace */
+		entry = proc_create("slabtrace",
+				    0400, mtk_memcfg_dir,
+				    &proc_slabtrace_operations);
+
+		if (!entry)
+			pr_info("create slabtrace proc entry failed\n");
+#endif
+	}
+
+	return 0;
+}
+
+late_initcall(mtk_memcfg_late_init);
diff --git a/mm/slab.h b/mm/slab.h
index 61d235e..2f48ad0 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -96,6 +96,16 @@
  * Tracking user of a slab.
  */
 #define TRACK_ADDRS_COUNT 16
+
+// #ifdef CONFIG_ARM64
+// #ifndef CONFIG_RANDOMIZE_BASE
+#define MTK_COMPACT_SLUB_TRACK
+#define MTK_MEMCFG_SLABTRACE_CNT 5
+#undef TRACK_ADDRS_COUNT
+#define TRACK_ADDRS_COUNT MTK_MEMCFG_SLABTRACE_CNT
+// #endif
+// #endif
+
 struct track {
 	unsigned long addr;	/* Called from address */
 #ifdef CONFIG_STACKTRACE
diff --git a/mm/slub.c b/mm/slub.c
index a160f2d2..573a695 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -726,6 +726,28 @@
 
 	if (addr) {
 #ifdef CONFIG_STACKTRACE
+#ifdef MTK_COMPACT_SLUB_TRACK
+		unsigned int nr_entries;
+		unsigned long addrs[TRACK_ADDRS_COUNT];
+		int i;
+
+		memset(addrs, 0, sizeof(addrs));
+
+		metadata_access_enable();
+		nr_entries = stack_trace_save(kasan_reset_tag(addrs),
+					      TRACK_ADDRS_COUNT, 3);
+		metadata_access_disable();
+
+		for (i = nr_entries; i < TRACK_ADDRS_COUNT; i++)
+			addrs[i] = 0;
+
+		for (i = 0; i < TRACK_ADDRS_COUNT; i++) {
+			if (addrs[i])
+				p->addrs[i] = addrs[i] - MODULES_VADDR;
+			else
+				p->addrs[i] = 0;
+		}
+#else
 		unsigned int nr_entries;
 
 		metadata_access_enable();
@@ -736,6 +758,7 @@
 		if (nr_entries < TRACK_ADDRS_COUNT)
 			p->addrs[nr_entries] = 0;
 #endif
+#endif
 		p->addr = addr;
 		p->cpu = smp_processor_id();
 		p->pid = current->pid;
@@ -763,6 +786,28 @@
 	pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
 	       s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
 #ifdef CONFIG_STACKTRACE
+#ifdef MTK_COMPACT_SLUB_TRACK
+	{
+		int i;
+		unsigned long addrs[TRACK_ADDRS_COUNT];
+
+		/* we store the offset after MODULES_VADDR for
+		 * kernel module and kernel text address
+		 */
+		for (i = 0; i < TRACK_ADDRS_COUNT; i++) {
+			if (t->addrs[i])
+				addrs[i] =  MODULES_VADDR + t->addrs[i];
+			else
+				addrs[i] = 0;
+		}
+		for (i = 0; i < TRACK_ADDRS_COUNT; i++) {
+			if (addrs[i])
+				pr_err("\t%pS\n", (void *)addrs[i]);
+			else
+				break;
+		}
+	}
+#else
 	{
 		int i;
 		for (i = 0; i < TRACK_ADDRS_COUNT; i++)
@@ -772,6 +817,7 @@
 				break;
 	}
 #endif
+#endif
 }
 
 void print_tracking(struct kmem_cache *s, void *object)
@@ -1474,7 +1520,8 @@
 			 * Avoid enabling debugging on caches if its minimum
 			 * order would increase as a result.
 			 */
-			higher_order_disable = true;
+			// print kmalloc-4096 and kmalloc-8192 alloc-backtrace
+			disable_higher_order_debug = 0;
 			break;
 		default:
 			if (init)
@@ -5092,6 +5139,9 @@
 struct location {
 	unsigned long count;
 	unsigned long addr;
+#ifdef CONFIG_STACKTRACE
+	unsigned long addrs[MTK_MEMCFG_SLABTRACE_CNT]; /* caller address */
+#endif
 	long long sum_time;
 	long min_time;
 	long max_time;
@@ -6286,3 +6336,257 @@
 	return -EIO;
 }
 #endif /* CONFIG_SLUB_DEBUG */
+
+
+static int mtk_memcfg_add_location(struct loc_track *t, struct kmem_cache *s,
+				const struct track *track)
+{
+	long start, end, pos;
+	struct location *l;
+	/* Caller from addresses */
+	unsigned long (*caddrs)[MTK_MEMCFG_SLABTRACE_CNT];
+	/* Called from addresses of track */
+	unsigned long taddrs[MTK_MEMCFG_SLABTRACE_CNT]
+		= { [0 ... MTK_MEMCFG_SLABTRACE_CNT - 1] = 0,};
+	unsigned long age = jiffies - track->when;
+	int i, cnt;
+
+	start = -1;
+	end = t->count;
+	/* find the index of track->addr */
+	for (i = 0; i < TRACK_ADDRS_COUNT; i++) {
+#ifdef MTK_COMPACT_SLUB_TRACK
+		/* we store the offset after MODULES_VADDR for
+		 * kernel module and kernel text address
+		 */
+		unsigned long addr = (MODULES_VADDR + track->addrs[i]);
+
+		if (track->addr == addr ||
+			((track->addr - 4) == addr))
+#else
+		if ((track->addr == track->addrs[i]) ||
+			(track->addr - 4 == track->addrs[i]))
+#endif
+			break;
+	}
+	/* copy all addrs if we cannot match track->addr */
+	if (i == TRACK_ADDRS_COUNT)
+		i = 0;
+	cnt = min(MTK_MEMCFG_SLABTRACE_CNT, TRACK_ADDRS_COUNT - i);
+#ifdef MTK_COMPACT_SLUB_TRACK
+	{
+		int j = 0;
+		unsigned long addrs[TRACK_ADDRS_COUNT];
+
+		for (j = 0; j < TRACK_ADDRS_COUNT; j++) {
+			/* we store the offset after MODULES_VADDR for
+			 * kernel module and kernel text address
+			 */
+			if (track->addrs[j])
+				addrs[j] = MODULES_VADDR + track->addrs[j];
+			else
+				addrs[j] = 0;
+		}
+		memcpy(taddrs, addrs + i, (cnt * sizeof(unsigned long)));
+	}
+#else
+	memcpy(taddrs, track->addrs + i, (cnt * sizeof(unsigned long)));
+#endif
+
+	for ( ; ; ) {
+		pos = start + (end - start + 1) / 2;
+
+		/*
+		 * There is nothing at "end". If we end up there
+		 * we need to add something to before end.
+		 */
+		if (pos == end)
+			break;
+
+		caddrs = &(t->loc[pos].addrs);
+		if (!memcmp(caddrs, taddrs,
+			MTK_MEMCFG_SLABTRACE_CNT * sizeof(unsigned long))) {
+
+			l = &t->loc[pos];
+			l->count++;
+			if (track->when) {
+				l->sum_time += age;
+				if (age < l->min_time)
+					l->min_time = age;
+				if (age > l->max_time)
+					l->max_time = age;
+
+				if (track->pid < l->min_pid)
+					l->min_pid = track->pid;
+				if (track->pid > l->max_pid)
+					l->max_pid = track->pid;
+
+				cpumask_set_cpu(track->cpu,
+						to_cpumask(l->cpus));
+			}
+			node_set(page_to_nid(virt_to_page(track)), l->nodes);
+			return 1;
+		}
+
+		if (memcmp(caddrs, taddrs,
+			MTK_MEMCFG_SLABTRACE_CNT * sizeof(unsigned long)) < 0)
+			end = pos;
+		else
+			start = pos;
+	}
+
+	/*
+	 * Not found. Insert new tracking element.
+	 */
+	if (t->count >= t->max &&
+		!alloc_loc_track(t, 2 * t->max, __GFP_HIGH | __GFP_ATOMIC))
+		return 0;
+
+	l = t->loc + pos;
+	if (pos < t->count)
+		memmove(l + 1, l,
+			(t->count - pos) * sizeof(struct location));
+	t->count++;
+	l->count = 1;
+	l->addr = track->addr;
+	memcpy(l->addrs, taddrs,
+			MTK_MEMCFG_SLABTRACE_CNT * sizeof(unsigned long));
+	l->sum_time = age;
+	l->min_time = age;
+	l->max_time = age;
+	l->min_pid = track->pid;
+	l->max_pid = track->pid;
+	cpumask_clear(to_cpumask(l->cpus));
+	cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
+	nodes_clear(l->nodes);
+	node_set(page_to_nid(virt_to_page(track)), l->nodes);
+	return 1;
+}
+
+static void mtk_memcfg_process_slab(struct loc_track *t, struct kmem_cache *s,
+		struct page *page, enum track_item alloc,
+		unsigned long *map)
+{
+	void *addr = page_address(page);
+	void *p;
+
+	bitmap_zero(map, page->objects);
+	//get_map(s, page, map);
+	__fill_map(map, s, page);
+
+	for_each_object(p, s, addr, page->objects)
+		if (!test_bit(__obj_to_index(s, addr, p), map))
+			mtk_memcfg_add_location(t, s, get_track(s, p, alloc));
+}
+
+static int mtk_memcfg_list_locations(struct kmem_cache *s, struct seq_file *m,
+		enum track_item alloc, unsigned long *slabsize, unsigned long *objsize)
+{
+	unsigned long i, j;
+	struct loc_track t = { 0, 0, NULL };
+	int node;
+	unsigned long *map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
+	struct kmem_cache_node *n;
+
+	if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+				     GFP_KERNEL)) {
+		kfree(map);
+		seq_puts(m, "Out of memory\n");
+		return 0;
+	}
+	/* Push back cpu slabs */
+	flush_all(s);
+
+	for_each_kmem_cache_node(s, node, n) {
+		unsigned long flags;
+		struct page *page;
+
+		if (!atomic_long_read(&n->nr_slabs))
+			continue;
+
+		spin_lock_irqsave(&n->list_lock, flags);
+		list_for_each_entry(page, &n->partial, lru)
+			mtk_memcfg_process_slab(&t, s, page, alloc, map);
+		list_for_each_entry(page, &n->full, lru)
+			mtk_memcfg_process_slab(&t, s, page, alloc, map);
+		spin_unlock_irqrestore(&n->list_lock, flags);
+	}
+
+	for (i = 0; i < t.count; i++) {
+		struct location *l = &t.loc[i];
+
+		seq_printf(m, "total_objsize:%d		count:%d  ", l->count*s->object_size, l->count);
+
+		if (s->flags & SLAB_RECLAIM_ACCOUNT)
+			seq_printf(m, "SLAB_RECLAIMABLE ");
+		else {
+			seq_printf(m, "SLAB_UNRECLAIMABLE ");
+			*objsize += l->count * s->object_size;
+			*slabsize += l->count * s->size;
+		}
+
+		if (l->addr)
+			seq_printf(m, "%pS", (void *)l->addr);
+		else
+			seq_puts(m, "<not-available>");
+
+		for (j = 1; j < MTK_MEMCFG_SLABTRACE_CNT; j++)
+			if (l->addrs[j])
+				seq_printf(m, " %pS", (void *)l->addrs[j]);
+
+		seq_puts(m, "\n");
+	}
+
+	free_loc_track(&t);
+	kfree(map);
+
+	if (!t.count)
+		seq_puts(m, "No data\n");
+	return 0;
+}
+
+static int mtk_memcfg_slabtrace_show(struct seq_file *m, void *p)
+{
+	struct kmem_cache *s;
+	unsigned long total_objsize;
+	unsigned long total_size;
+	unsigned long objsize;
+	unsigned long size;
+
+	total_objsize = 0;
+	total_size = 0;
+
+	mutex_lock(&slab_mutex);
+	list_for_each_entry(s, &slab_caches, list) {
+		/* We only want to know the backtraces of kmalloc-*
+		 * Backtraces of other kmem_cache can be find easily
+		 */
+		//if (!strstr(s->name, "kmalloc-"))
+		//	continue;
+		objsize = 0;
+		size = 0;
+
+		seq_printf(m, "======= kmem_cache: %s alloc_calls, objsize=%d =======\n",
+			s->name, s->object_size);
+
+		if (!(s->flags & SLAB_STORE_USER))
+			continue;
+		else
+			mtk_memcfg_list_locations(s, m, TRACK_ALLOC, &size, &objsize);
+
+		seq_printf(m, "======= kmem_cache: %s, total_size=%d total_objsize=%d=======\n",
+			s->name, size, objsize);
+		total_size += size;
+		total_objsize += objsize;
+	}
+	mutex_unlock(&slab_mutex);
+	seq_printf(m, " =======\n");
+	seq_printf(m, "SLAB_UNRECLAIMABLE : total_size =%d =======\n", total_size);
+	seq_printf(m, "SLAB_UNRECLAIMABLE : total_objsize =%d =======\n", total_objsize);
+	return 0;
+}
+
+int slabtrace_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtk_memcfg_slabtrace_show, NULL);
+}

1.2 抓取meminfo和slabtrace等日志

脚本如下:

@echo off
set "root_DIR=%CD%\meminfo"  

if not exist "%root_DIR%" (
    mkdir "%root_DIR%"
) else (
    echo 目录已存在:%root_DIR%
)

:loop
set "timestamp=%time::=%"
set "timestamp=%timestamp:.=%"

set "filename1=%root_DIR%\meminfo_%timestamp%.txt"
set "filename2=%root_DIR%\slabinfo_%timestamp%.txt"
set "filename3=%root_DIR%\slabtrace_%timestamp%.txt"
set "filename4=%root_DIR%\dumpsys_meminfo_%timestamp%.txt"
set "filename5=%root_DIR%\kmalloc_128_trace_%timestamp%.txt"
echo Log starting.
adb shell "cat /proc/meminfo" > "%filename1%" 
adb shell "cat /proc/slabinfo" > "%filename2%" 
adb shell "cat /proc/mtk_memcfg/slabtrace" > "%filename3%" 
adb shell "dumpsys meminfo" > "%filename4%"  
adb shell "cat /sys/kernel/debug/slab/kmalloc-128/alloc_traces"   > "%filename5%" 
ping -n 20 127.0.0.1 > nul  
goto loop

1.3 日志分析

[411137.591831] [T712169] mali-mem-purge: [name:slab_common&]kmalloc-128          1733601KB    1733660KB
61S 增加  39,036KB   == 38MB
[411198.405354] [T700264] init: [name:slab_common&]kmalloc-128          1772667KB    1772696KB
 
 
[ 3248.571624] [T323562] kworker/3:27H: [name:slab_common&]kmalloc-128          2085599KB    2085600KB
152S 增加  103,028KB   ==   100MB
[ 3400.434555] [T201310] VSyncThread_0: [name:slab_common&]kmalloc-128          2188627KB    2188628KB

slabinfo 日志:
slabinfo_20241230.png
可以看出kmalloc-128存在内存泄漏

Slabtrace log:
slabtrace_20241230.png

初步怀疑为pd_dbg_info造成的内存泄漏

1.4 抓取alloc_traces和free_traces

//alloc_traces
3761872 pd_dbg_info+0x168/0x2d4 [pd_dbg_info] age=3948/941474/1383419 pid=152-31633 cpus=0-7

//free_traces
224862 print_out_dwork_fn+0x1d8/0x22c [pd_dbg_info] age=32129/948539/1680953 pid=93-32458 cpus=6-7

pd_dbg_info函数申请内存次数达到3761872 ,而释放的函数print_out_dwork_fn只释放了224862次,存在大量的slab内存没有得到释放

2. 根本原因

slab_memleak_pd_dbg_info.jpeg
调用print_out_dwork_fn工作队列函数释放内存时,会统计这个printed的次数,如果超过了dbg_log_limit,则会启动延迟队列。从log来看明显超过了dbg_log_limit,这就导致大量的slab内存申请后因为延迟队列的原因没有能够得到及时的释放,越来越多最终造成slab内存泄漏。