一、问题背景

测试DDR TT和Reboot测试专项过程中出现机器进dump,dump根因都相同

二、问题分析

2.1 panic现场

[    8.619016][  T138] ufshcd-qcom 4804000.ufshc: ufshcd_complete_dev_init fDeviceInit was not cleared by the device

[   18.838491][    T1] init: Wait for partitions returned after 10010ms
[   18.844889][    T1] init: bool android::init::BlockDevInitializer::InitDevices(std::set<std::string>): partition(s) not found after polling timeout: boot_a, dtbo_a, metadata, recovery_a, super, vbmeta_a, vbmeta_system_a
[   18.864489][    T1] init: Failed to mount required partitions early ...
[   18.871136][    T1] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00007f00
[   18.879490][    T1] CPU: 6 PID: 1 Comm: init Tainted: G        WC OE     5.15.167-android13-8-00009-g705f06af57bc-ab13257286 #1
[   18.890966][    T1] Hardware name: Qualcomm Technologies, Inc. KHAJE IDP nopmi creek (DT)
[   18.899138][    T1] Call trace:
[   18.902278][    T1]  dump_backtrace.cfi_jt+0x0/0x8
[   18.907072][    T1]  dump_stack_lvl+0x80/0xb8
[   18.911430][    T1]  panic+0x190/0x4b4
[   18.915179][    T1]  do_exit+0xb94/0x1054
[   18.919185][    T1]  do_group_exit+0x144/0x174
[   18.923628][    T1]  do_group_exit+0x0/0x174
[   18.927898][    T1]  invoke_syscall+0x60/0x150
[   18.932339][    T1]  el0_svc_common+0xb8/0xf8
[   18.936694][    T1]  do_el0_svc+0x28/0x98
[   18.940698][    T1]  el0_svc+0x24/0x84
[   18.944449][    T1]  el0t_64_sync_handler+0x88/0xec
[   18.949326][    T1]  el0t_64_sync+0x1b8/0x1bc
[   18.953681][    T1] SMP: stopping secondary CPUs

从这个地方来看,死机的直接原因时发生了panic,init进程被杀掉。

/dev/block/... 设备没创建(UFS 初始化失败),导致mount 超时。

所以问题点在ufs没有初始化成功,函数ufshcd_complete_dev_init

2.2 ufshcd_complete_dev_init 加debug patch

diff --git a/drivers/scsi/mi_ufs/mi_ufshcd.c b/drivers/scsi/mi_ufs/mi_ufshcd.c
index c704ef3..fe57a2b 100644
--- a/drivers/scsi/mi_ufs/mi_ufshcd.c
+++ b/drivers/scsi/mi_ufs/mi_ufshcd.c
@@ -94,7 +94,7 @@
 #define UFSHCD_REF_CLK_GATING_WAIT_US 0xFF /* microsecs */
 
 /* Polling time to wait for fDeviceInit */
-#define FDEVICEINIT_COMPL_TIMEOUT 1500 /* millisecs */
+#define FDEVICEINIT_COMPL_TIMEOUT 5000 /* millisecs */
 
 #define wlun_dev_to_hba(dv) shost_priv(to_scsi_device(dv)->host)
 
@@ -4599,6 +4599,12 @@
 	int err;
 	bool flag_res = true;
 	ktime_t timeout;
+	u64 time_before, time_after, current_time, before_send, before_usleep, after_usleep;
+	u64 fDeviceInit_time = 0;
+	u64 fDeviceInit_threshold = 3500;           /* 3500ms */
+	unsigned long usleep_min = 5*1000*1000;     /* 5s */
+	unsigned long usleep_max = 6*1000*1000;     /* 6s */
+	int count = 0;
 
 	err = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG,
 		QUERY_FLAG_IDN_FDEVICEINIT, 0, NULL);
@@ -4611,13 +4617,26 @@
 
 	/* Poll fDeviceInit flag to be cleared */
 	timeout = ktime_add_ms(ktime_get(), FDEVICEINIT_COMPL_TIMEOUT);
+	time_before = ktime_get_ns();
 	do {
+		count++;
+		before_send = ktime_get_ns();
 		err = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_READ_FLAG,
 					QUERY_FLAG_IDN_FDEVICEINIT, 0, &flag_res);
+		current_time = ktime_get_ns();
+		pr_err("fDeviceInit value %d before_send %llu current_time %lluns time %lluns count %d\n", flag_res, before_send, current_time, current_time - before_send, count);
 		if (!flag_res)
 			break;
+
+	        before_usleep = ktime_get_ns();
 		usleep_range(500, 1000);
+		after_usleep = ktime_get_ns();
+		pr_err("fDeviceInit before_usleep %lluns after_usleep %lluns usleep %lluns count %d\n", before_usleep, after_usleep, after_usleep - before_usleep, count);
 	} while (ktime_before(ktime_get(), timeout));
+	time_after = ktime_get_ns();
+
+	fDeviceInit_time = current_time - time_before;
+	pr_err("fDeviceInit time %lluns, timeout limit %dms, threshold %llums time_before %lluns time_after %lluns time %lluns\n", fDeviceInit_time, FDEVICEINIT_COMPL_TIMEOUT, fDeviceInit_threshold, time_before, time_after, time_after - time_before);
 
 	if (err) {
 		dev_err(hba->dev,
@@ -4629,6 +4648,13 @@
 				__func__);
 		err = -EBUSY;
 	}
+
+	if (fDeviceInit_time/1000/1000 > fDeviceInit_threshold) {
+		err = -EBUSY;
+		usleep_range(usleep_min, usleep_max);
+		pr_err("fDeviceInit time exceeds %llums\n", fDeviceInit_threshold);
+		BUG();
+	}
 out:
 	return err;
 }
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 86c10ed..b62eef2 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -832,7 +832,7 @@
 		blist_flags_t *bflags, int async)
 {
 	int ret;
-
+	struct task_struct *task = get_current();
 	/*
 	 * XXX do not save the inquiry, since it can change underneath us,
 	 * save just vendor/model/rev.
@@ -932,10 +932,10 @@
 		sdev->sdtr = 1;
 
 	sdev_printk(KERN_NOTICE, sdev, "%s %.8s %.16s %.4s PQ: %d "
-			"ANSI: %d%s\n", scsi_device_type(sdev->type),
+			"ANSI: %d%s comm: %s pid: %d tgid: %d\n", scsi_device_type(sdev->type),
 			sdev->vendor, sdev->model, sdev->rev,
 			sdev->inq_periph_qual, inq_result[2] & 0x07,
-			(inq_result[3] & 0x0f) == 1 ? " CCS" : "");
+			(inq_result[3] & 0x0f) == 1 ? " CCS" : "", task->comm, task->pid, task->tgid);
 
 	if ((sdev->scsi_level >= SCSI_2) && (inq_result[7] & 2) &&
 	    !(*bflags & BLIST_NOTQ)) {

在这个函ufshcd_complete_dev_init 所涉及的函数跳转都加上时间戳的计算,来判定出耗时出现在哪些地方!并且当出现问题时,主动执BUG 函数触发进入dump,抓取fulldump。

2.3 新dump分析

	Line 4775: [    2.247957][    T8] fDeviceInit before_usleep 2244560207ns after_usleep 2245576093ns usleep 1015886ns count 12
	Line 4777: [    2.248998][    T8] fDeviceInit value 1 before_send 2245580936 current_time 2246616770ns time 1035834ns count 13
	Line 4778: [    2.250016][    T8] fDeviceInit before_usleep 2246620780ns after_usleep 2247634999ns usleep 1014219ns count 13
	Line 4779: [    2.251193][    T8] fDeviceInit value 1 before_send 2247638488 current_time 2248812395ns time 1173907ns count 14
	Line 4780: [    2.252214][    T8] fDeviceInit before_usleep 2248817655ns after_usleep 2249832916ns usleep 1015261ns count 14
	Line 4781: [    2.253251][    T8] fDeviceInit value 1 before_send 2249836874 current_time 2250869999ns time 1033125ns count 15
	Line 4782: [    2.254268][    T8] fDeviceInit before_usleep 2250873749ns after_usleep 2251887603ns usleep 1013854ns count 15
	Line 4789: [    7.139614][    T8] fDeviceInit value 0 before_send 7136052914 current_time 7137233070ns time 1180156ns count 16
	Line 4791: [    7.150431][    T8] fDeviceInit time 4917530779ns, timeout limit 5000ms, threshold 3500ms time_before 2219702291ns time_after 7148050153ns time 4928347862ns
	Line 4795: [   12.766765][    T8] fDeviceInit time exceeds 3500ms

在循环15次后,第16次发生了异常,我们可以看到2.25s直接跳到了7.15s,大约5s中的时间没有出现过日志输出。

第二份日志

	Line 4815: [    2.166157][    T8] fDeviceInit value 1 before_send 2159485520 current_time 2160663749ns time 1178229ns count 19
	Line 4816: [    2.167178][    T8] fDeviceInit before_usleep 2160669166ns after_usleep 2161684999ns usleep 1015833ns count 19
	Line 4817: [    2.168354][    T8] fDeviceInit value 1 before_send 2161689113 current_time 2162861301ns time 1172188ns count 20
	Line 4818: [    2.169375][    T8] fDeviceInit before_usleep 2162866301ns after_usleep 2163882655ns usleep 1016354ns count 20
	Line 4819: [    2.170549][    T8] fDeviceInit value 1 before_send 2163886666 current_time 2165056509ns time 1169843ns count 21
	Line 4825: [    7.158296][    T8] fDeviceInit before_usleep 7151788382ns after_usleep 7152803643ns usleep 1015261ns count 21
	Line 4827: [    7.172870][    T8] fDeviceInit time 45096093ns, timeout limit 5000ms, threshold 3500ms time_before 2119960416ns time_after 7167376924ns time 5047416508ns
	Line 4829: [    7.184458][    T8] ufshcd-qcom 4804000.ufshc: ufshcd_complete_dev_init fDeviceInit was not cleared by the device

在循环21次的时候出现问题,本应该成对出现的第21次日志打印,前者出现在2.17s,后者出现在7.15s

但是看这个usleep(也就是内部函数执行的耗时)都是1ms左右,那说明此时的并不是函数执行导致的阻塞!

2.4 trace32分析

通过查看各个cpu的tasks_sched_stats,发现了如下一个线程出现了很明显的异常,执行时间大约4386ms。

kworker/6:1            89       7.138401166       0.000000000       4.391132095               92     120        D       2.751619695       7.138425541     4386.805846000

查询这个栈

__switch_to(prev = 0xFFFFFF80010B0000, next = 0xFFFFFF8000208000)
  prev = 0xFFFFFF80010B0000
  next = 0xFFFFFF8000208000
  last = 0x1B1507D9
context_switch(inline)
  rq = 0xFFFFFF81791D7100
  prev = 0xFFFFFF80010B0000
  next = 0xFFFFFF8000208000
  rf = 0xFFFFFFC008463D68
__schedule(sched_mode = ?)
  rf = (flags = 0x0, cookie = (), clock_update_flags = 0x4)
  switch_count = 0xFFFFFF80010B0708
  next = 0xFFFFFF8000208000
  prev = 0xFFFFFF80010B0000
  rq = 0xFFFFFF81791D7100
schedule()
  tsk = 0xFFFFFF80010B0000
worker_thread(__worker = 0xFFFFFF800012D500)
  __worker = 0xFFFFFF800012D500              
  worker = 0x0
  pool = 0xFFFFFF81791D6A00
kthread(_create = 0xFFFFFF8001066100)
  _create = 0xFFFFFF8001066100
  create = 0x0
  data = 0xFFFFFF800012D500
  threadfn = 0x0
  ret = 0x0012D500
  self = 0xFFFFFF800014D900
ret_from_fork(asm)

当前这个 kworker 正在执行一个工作项(work item),并在某个阶段进入了 schedule() 阻塞,具体任务在 __worker

struct worker * = 0xFFFFFF800012D500

\重点字段分析

current_work = 0x0

current_func = 0x0

last_func = 0xFFFFFFEBF6352CA8

含义如下:

字段

含义

current_work = 0x0

当前没有指向具体的 work_struct,说明工作已执行完或栈被破坏

current_func = 0x0

同上,func 为 null,说明当前没有活跃任务

last_func = 0xFFFFFFEBF6352CA8

✅ 最后一次执行的函数地址 —— 关键定位点!

查看是哪个函数?

(work_func_t) [ND:0x0::0xFFFFFF800012D598] (*(((struct worker *)0xFFFFFF800012D500))).last_func = 0xFFFFFFEBF6352CA8 = kfree_rcu_monitor.cfi_jt

这说明:

当前 kworker/6:1 的最后一次有效执行函数是 kfree_rcu_monitor(),它是 kfree_rcu()call_rcu() 用于回收内存的后台回调。

但是你系统在 [2.7s → 7.1s] 卡住,不太可能是单纯一个 RCU 的 kfree 导致所有线程死锁。这提示我们:

last_func = kfree_rcu_monitor() 很可能是:栈帧/worker 数据被破坏后残留的假地址;

此时我们查看一下寄存器信息:

关键寄存器分析

  1. 当前栈指针

SP = X29 = 0xFFFFFFC008463DE0

表示当前任务栈顶位于此处,你已经知道它正在 schedule() 中。

  1. X30 = 0x0

这是 Link Register(返回地址)为空,说明调用者已经退栈,或因为调度进入 schedule(),LR 未再写入。

  1. 程序计数器 PC

PC = 0xFFFFFFEBF6356334 → schedule+0x150

说明当前任务正卡在 schedule()

  1. ELR = 0xFFFFFFEBF1FC5D50

这是 Exception Link Register,也就是中断/异常返回地址。

极可能是:

调度器中断切换上下文时保存的 被中断线程的返回地址

-> 反查 ELR 指向的函数

而mi_ufshcd.c +8279行就是我们加debug patch的函数。

到这里我们可以证实一点:那就ufshcd_complete_dev_init 函数在执行过程中被调度出去了,因为ELR里保存了函数地址!

2.5 查看CPU栈

0xffffffc00803b718:('vsnprintf', 160)
0xffffffc00803b798:('sprintf', 96)
0xffffffc00803b858:('number', 312)
0xffffffc00803b8c8:('vsnprintf', 160)
0xffffffc00803b8f8:('vsnprintf', 188)
0xffffffc00803b938:('vsnprintf', 160)
0xffffffc00803b968:('vsnprintf', 188)
0xffffffc00803b998:('persistent_ram_write', 564)
0xffffffc00803b9a8:('sprintf', 96)
0xffffffc00803b9f8:('ramoops_pstore_write', 540)
0xffffffc00803ba00:('dummy_clock_read[jt]', 0)
0xffffffc00803ba28:('ramoops_pstore_write[jt]', 0)
0xffffffc00803ba48:('qcom_geni_serial_poll_tx_done', 184)
0xffffffc00803ba78:('pstore_console_write', 372)
0xffffffc00803ba98:('ramoops_pstore_write[jt]', 0)
0xffffffc00803bab8:('qcom_geni_serial_console_write', 1080)
0xffffffc00803bb28:('console_unlock', 1436)
0xffffffc00803bb40:('pstore_console_write[jt]', 0)
0xffffffc00803bc28:('vprintk_emit', 196)
0xffffffc00803bc68:('vprintk_emit', 232)
0xffffffc00803bcb8:('vprintk[jt]', 224)
0xffffffc00803bd68:('_printk', 88)
0xffffffc00803bd98:('__secondary_switched', 156)
0xffffffc00803be18:('dump_backtrace', 392)
0xffffffc00803be28:('__secondary_switched', 156)
0xffffffc00803be58:('show_regs', 48)
0xffffffc00803bec8:('do_handle_IPI', 712)
0xffffffc00803bef8:('ipi_handler', 32)
0xffffffc00803bf00:('timer_irq_ack[jt]', 0)
0xffffffc00803bf38:('handle_percpu_devid_irq', 192)
0xffffffc00803bf48:('handle_domain_irq', 176)
0xffffffc00803bf58:('handle_domain_irq', 216)
0xffffffc00803bf90:('handle_percpu_devid_irq[jt]', 0)
0xffffffc00803bfa8:('gic_handle_irq.34242', 88)
0xffffffc00803bfc0:('cpuidle_enter_state', 948)
0xffffffc00803bfd0:('gic_handle_irq.34242[jt]', 0)
0xffffffc00803bfe8:('call_on_irq_stack', 60)

该CPU在处理console输出,查看所有CPU后发现,8个CPU有7个CPU在调度串口输出

	Line 5832: Core_0 call stack :
	Line 5846: 0xffffffc008003a48:('qcom_geni_serial_poll_tx_done', 184)
	Line 5869: Core_1 call stack :
	Line 5883: 0xffffffc00800ba48:('qcom_geni_serial_poll_tx_done', 184)
	Line 5909: Core_2 call stack :
	Line 5923: 0xffffffc008013a48:('qcom_geni_serial_poll_tx_done', 184)
	Line 5948: Core_3 call stack :
	Line 5962: 0xffffffc00801ba48:('qcom_geni_serial_poll_tx_done', 184)
	Line 5987: Core_4 call stack :
	Line 6001: 0xffffffc008023a48:('qcom_geni_serial_poll_tx_done', 184)
	Line 6026: Core_5 call stack :
	Line 6040: 0xffffffc00802ba48:('qcom_geni_serial_poll_tx_done', 184)
	Line 6065: Core_6 call stack :
	Line 6122: Core_7 call stack :
	Line 6136: 0xffffffc00803ba48:('qcom_geni_serial_poll_tx_done', 184)

所以问题分析到现在,已经很明了了。

三、根本原因

  1. ufs函数执行的时候被串口输出调度出去

  2. 串口输出 调度优先级很高,输出时,会执行console_unlock(),在持锁状态下运行,不允许调度,一直执行串口输出,直到结束;

  3. 阻塞式串口写函数(qcom_geni_serial_poll_tx_done)会主动 spin 等待硬件完成,造成长时间占用 CPU。