一、问题背景
- 老化测试
- 卡android
- 组合键手动进dump
{% btns rounded grid1 %}
{% cell 下载日志1, https://hexoimg.oss-cn-shanghai.aliyuncs.com/blog/25/4/parser_output_android_stability_037.zip, anzhiyufont anzhiyu-icon-bolt %}
{% endbtns %}
二、日志分析
2.1 查看dmesg_TZ.txt
[54150.907856][ T792] ------ collect D&R-state processes info before long comb key ------
[54150.907858][ T792] task PC stack pid father
[54150.907915][ T792] task:g_reclaim_threa state:D
[54150.913568][ T794] mtdoops_do_dump start , count = 35 , page = 2, reason = 7, dump_count = 1
[54150.918190][ T1] init: Control message: Could not find 'android.frameworks.sensorservice@1.0::ISensorManager/default' for ctl.interface_start from pid: 751 (/system/system_ext/bin/hwservicemanager)
[54150.920259][ T792] stack:0 pid:122 ppid:2 flags:0x00000008
[54150.921584][ T794] mtdoops_do_dump pmsg paddr = 0x000000006c424807
[54150.921755][ T794] [LIUQI] reason=7
[54150.921763][ T794] mtdoops_do_dump() finish
[54150.921971][T19075] [LIUQI] panic=0
[54150.921975][T19075] _block2mtd_write: start len = 2097152, mapping_index= 1024
[54150.923302][T19075] _block2mtd_write: finish all
[54150.938144][ T747] servicemanager: Caller(pid=1581,uid=1000,sid=u:r:hal_touchfeature_xiaomi_default:s0) Since 'android.frameworks.sensorservice.ISensorManager/default' could not be found trying to start it as a lazy AIDL service. (if it's not configured to be a lazy service, it may be stuck starting or still starting).
[54150.945818][ T792] Call trace:
[54150.945823][ T792] __switch_to+0x164/0x308
[54150.964311][ T1] init: Control message: Could not find 'aidl/android.frameworks.sensorservice.ISensorManager/default' for ctl.interface_start from pid: 747 (/system/bin/servicemanager)
[54150.970439][ T792] __schedule+0x624/0xa10
[54150.976974][T29122] servicemanager: Caller(pid=1581,uid=1000,sid=u:r:hal_touchfeature_xiaomi_default:s0) Tried to start aidl service android.frameworks.sensorservice.ISensorManager/default as a lazy service, but was unable to. Usually this happens when a service is not installed, but if the service is intended to be used as a lazy service, then it may be configured incorrectly.
[54150.980509][ T792] schedule+0x84/0xf0
[54151.095051][ T792] schedule_preempt_disabled+0x28/0x44
[54151.100443][ T792] kthread+0xb8/0x154
[54151.104364][ T792] ret_from_fork+0x10/0x20
[54151.108996][ T792] task:irq/252-usbin-p state:D stack:0 pid:959 ppid:2 flags:0x00000008
[54151.118156][ T792] Call trace:
[54151.121376][ T792] __switch_to+0x164/0x308
[54151.125735][ T792] __schedule+0x624/0xa10
[54151.129985][ T792] schedule+0x84/0xf0
[54151.133891][ T792] schedule_preempt_disabled+0x28/0x44
[54151.139277][ T792] __mutex_lock+0x414/0xdec
[54151.143691][ T792] __mutex_lock_slowpath+0x14/0x24
[54151.148771][ T792] mutex_lock+0x40/0xec
[54151.152844][ T792] vote+0x58/0x32c [qpnp_smb5_main]
[54151.158093][ T792] smb5_usb_plugin_irq_handler+0x164/0xdc8 [qpnp_smb5_main]
[54151.165428][ T792] irq_thread_fn+0x44/0xa4
[54151.169783][ T792] irq_thread+0x164/0x290
[54151.174038][ T792] kthread+0x10c/0x154
[54151.178029][ T792] ret_from_fork+0x10/0x20
[54151.182379][ T792] task:irq/268-typec-a state:D stack:0 pid:970 ppid:2 flags:0x00000008
[54151.191509][ T792] Call trace:
[54151.194717][ T792] __switch_to+0x164/0x308
[54151.199074][ T792] __schedule+0x624/0xa10
[54151.203348][ T792] schedule+0x84/0xf0
[54151.207235][ T792] schedule_preempt_disabled+0x28/0x44
[54151.212641][ T792] __mutex_lock+0x414/0xdec
[54151.217044][ T792] __mutex_lock_slowpath+0x14/0x24
[54151.222086][ T792] mutex_lock+0x40/0xec
[54151.226142][ T792] vote+0x58/0x32c [qpnp_smb5_main]
[54151.231376][ T792] smb5_typec_attach_detach_irq_handler+0x84/0x108c [qpnp_smb5_main]
[54151.239425][ T792] irq_thread_fn+0x44/0xa4
[54151.243814][ T792] irq_thread+0x164/0x290
[54151.248047][ T792] kthread+0x10c/0x154
[54151.252072][ T792] ret_from_fork+0x10/0x20
[54151.256554][ T792] task:android.hardwar state:D stack:0 pid:1445 ppid:1 flags:0x04000009
[54151.265706][ T792] Call trace:
[54151.268894][ T792] __switch_to+0x164/0x308
[54151.273291][ T792] __schedule+0x624/0xa10
[54151.277577][ T792] schedule+0x84/0xf0
[54151.281491][ T792] schedule_preempt_disabled+0x28/0x44
[54151.286880][ T792] __mutex_lock+0x414/0xdec
[54151.291326][ T792] __mutex_lock_slowpath+0x14/0x24
[54151.296383][ T792] mutex_lock+0x40/0xec
[54151.300456][ T792] get_effective_result+0x28/0x74 [qpnp_smb5_main]
[54151.307004][ T792] smblib_get_prop_batt_status+0x258/0x514 [qpnp_smb5_main]
[54151.314300][ T792] smb5_batt_get_prop+0x54/0x25c [qpnp_smb5_main]
[54151.320734][ T792] power_supply_get_property+0x40/0x6c
[54151.326131][ T792] power_supply_show_property+0x8c/0x28c
[54151.331671][ T792] dev_attr_show+0x38/0x74
[54151.336037][ T792] sysfs_kf_seq_show+0xb4/0x130
[54151.340827][ T792] kernfs_seq_show+0x4c/0x60
[54151.345372][ T792] seq_read_iter+0x158/0x4ec
[54151.349874][ T792] kernfs_fop_read_iter+0x68/0x1bc
[54151.354966][ T792] vfs_read+0x234/0x2c8
[54151.359028][ T792] ksys_read+0x78/0xe8
[54151.363054][ T792] __arm64_sys_read+0x1c/0x2c
[54151.367626][ T792] invoke_syscall+0x58/0x114
[54151.372164][ T792] el0_svc_common+0xb4/0xfc
[54151.376570][ T792] do_el0_svc+0x24/0x84
[54151.380659][ T792] el0_svc+0x2c/0x90
[54151.384462][ T792] el0t_64_sync_handler+0x68/0xb4
[54151.389426][ T792] el0t_64_sync+0x1a4/0x1a8
[54151.393986][ T792] task:hvdcp_opti state:D stack:0 pid:1954 ppid:1 flags:0x04000000
[54151.403159][ T792] Call trace:
[54151.406376][ T792] __switch_to+0x164/0x308
[54151.410721][ T792] __schedule+0x624/0xa10
[54151.415009][ T792] schedule+0x84/0xf0
[54151.418942][ T792] schedule_preempt_disabled+0x28/0x44
[54151.424299][ T792] __mutex_lock+0x414/0xdec
[54151.428697][ T792] __mutex_lock_slowpath+0x14/0x24
[54151.433752][ T792] mutex_lock+0x40/0xec
[54151.437800][ T792] vote+0x58/0x32c [qpnp_smb5_main]
[54151.443136][ T792] smb5_iio_set_prop+0xb8/0x774 [qpnp_smb5_main]
[54151.449513][ T792] smb5_write_raw+0x18/0x28 [qpnp_smb5_main]
[54151.455696][ T792] iio_write_channel_info+0x13c/0x1b8
[54151.460974][ T792] dev_attr_store+0x30/0x48
[54151.465393][ T792] sysfs_kf_write+0x54/0x6c
[54151.469823][ T792] kernfs_fop_write_iter+0x104/0x1a4
[54151.475088][ T792] vfs_write+0x244/0x2e0
[54151.479305][ T792] ksys_write+0x78/0xe8
[54151.483381][ T792] __arm64_sys_write+0x1c/0x2c
[54151.488084][ T792] invoke_syscall+0x58/0x114
[54151.492592][ T792] el0_svc_common+0xb4/0xfc
[54151.497016][ T792] do_el0_svc+0x24/0x84
[54151.501088][ T792] el0_svc+0x2c/0x90
[54151.504911][ T792] el0t_64_sync_handler+0x68/0xb4
[54151.509838][ T792] el0t_64_sync+0x1a4/0x1a8
[54151.514518][ T792] task:kworker/1:3 state:D stack:0 pid:24305 ppid:2 flags:0x00000008
[54151.523654][ T792] Workqueue: events status_change_work [qpnp_smb5_main]
[54151.530670][ T792] Call trace:
[54151.533911][ T792] __switch_to+0x164/0x308
[54151.538281][ T792] __schedule+0x624/0xa10
[54151.542560][ T792] schedule+0x84/0xf0
[54151.546467][ T792] schedule_preempt_disabled+0x28/0x44
[54151.551919][ T792] __mutex_lock+0x414/0xdec
[54151.556412][ T792] __mutex_lock_slowpath+0x14/0x24
[54151.561462][ T792] mutex_lock+0x40/0xec
[54151.565672][ T792] rerun_election+0x24/0xcc [qpnp_smb5_main]
[54151.571699][ T792] pl_disable_vote_callback+0x2dc/0x7b4 [qpnp_smb5_main]
[54151.578807][ T792] rerun_election+0xa4/0xcc [qpnp_smb5_main]
[54151.584830][ T792] pl_fcc_vote_callback+0x1b8/0x28c [qpnp_smb5_main]
[54151.591614][ T792] rerun_election+0xa4/0xcc [qpnp_smb5_main]
[54151.597607][ T792] status_change_work+0x38/0x764 [qpnp_smb5_main]
[54151.604085][ T792] process_one_work+0x240/0x504
[54151.608881][ T792] worker_thread+0x264/0x480
[54151.613372][ T792] kthread+0x10c/0x154
[54151.617403][ T792] ret_from_fork+0x10/0x20
[54151.621746][ T792] task:kworker/1:0 state:D stack:0 pid:18811 ppid:2 flags:0x00000008
[54151.630870][ T792] Workqueue: events xm_charge_work [xm_smart_chg]
[54151.637234][ T792] Call trace:
[54151.640515][ T792] __switch_to+0x164/0x308
[54151.644870][ T792] __schedule+0x624/0xa10
[54151.649156][ T792] schedule+0x84/0xf0
[54151.653072][ T792] schedule_preempt_disabled+0x28/0x44
[54151.658457][ T792] __mutex_lock+0x414/0xdec
[54151.662897][ T792] __mutex_lock_slowpath+0x14/0x24
[54151.667945][ T792] mutex_lock+0x40/0xec
[54151.671995][ T792] vote+0x58/0x32c [qpnp_smb5_main]
[54151.677240][ T792] monitor_outdoor_charging+0xd4/0x1cc [xm_smart_chg]
[54151.683942][ T792] xm_charge_work+0x38/0x244 [xm_smart_chg]
[54151.689780][ T792] process_one_work+0x240/0x504
[54151.694539][ T792] worker_thread+0x264/0x480
[54151.699062][ T792] kthread+0x10c/0x154
[54151.703055][ T792] ret_from_fork+0x10/0x20
[54151.707450][ T792] task:kworker/0:3 state:D stack:0 pid:22483 ppid:2 flags:0x00000008
[54151.716547][ T792] Workqueue: events status_change_work [qpnp_smb5_main]
[54151.723579][ T792] Call trace:
[54151.726785][ T792] __switch_to+0x164/0x308
[54151.731129][ T792] __schedule+0x624/0xa10
[54151.735428][ T792] schedule+0x84/0xf0
[54151.739349][ T792] schedule_preempt_disabled+0x28/0x44
[54151.744731][ T792] __mutex_lock+0x414/0xdec
[54151.749188][ T792] __mutex_lock_slowpath+0x14/0x24
[54151.754251][ T792] mutex_lock+0x40/0xec
[54151.758329][ T792] vote+0x58/0x32c [qpnp_smb5_main]
[54151.763606][ T792] usb_icl_vote_callback+0x54/0x268 [qpnp_smb5_main]
[54151.770316][ T792] rerun_election+0xa4/0xcc [qpnp_smb5_main]
[54151.776306][ T792] smblib_get_prop_batt_status+0x418/0x514 [qpnp_smb5_main]
[54151.783632][ T792] smb5_batt_get_prop+0x54/0x25c [qpnp_smb5_main]
[54151.790068][ T792] power_supply_get_property+0x40/0x6c
[54151.795473][ T792] pl_fv_vote_callback+0x12c/0x1e0 [qpnp_smb5_main]
[54151.802100][ T792] vote+0x31c/0x32c [qpnp_smb5_main]
[54151.807402][ T792] status_change_work+0xe9c/0x25fc [qpnp_smb5_main]
[54151.813974][ T792] process_one_work+0x240/0x504
[54151.818826][ T792] worker_thread+0x344/0x480
[54151.823356][ T792] kthread+0x10c/0x154
[54151.827376][ T792] ret_from_fork+0x10/0x20
[54151.831703][ T792] task:kworker/6:0 state:D stack:0 pid:30801 ppid:2 flags:0x00000008
[54151.840852][ T792] Workqueue: events power_supply_changed_work
[54151.846853][ T792] Call trace:
[54151.850061][ T792] __switch_to+0x164/0x308
[54151.854387][ T792] __schedule+0x624/0xa10
[54151.858651][ T792] schedule+0x84/0xf0
[54151.862541][ T792] schedule_preempt_disabled+0x28/0x44
[54151.867930][ T792] __mutex_lock+0x414/0xdec
[54151.872332][ T792] __mutex_lock_slowpath+0x14/0x24
[54151.877394][ T792] mutex_lock+0x40/0xec
[54151.881448][ T792] get_client_vote+0x34/0x68 [qpnp_smb5_main]
[54151.887593][ T792] smb5_batt_get_prop+0x21c/0x25c [qpnp_smb5_main]
[54151.894122][ T792] power_supply_get_property+0x40/0x6c
[54151.899567][ T792] power_supply_show_property+0x8c/0x28c
查看进程栈,多个进程处于D状态
2.2 task:irq/252-usbin-p
[53888.395234][T26938] task:irq/252-usbin-p state:D stack:0 pid:959 ppid:2 flags:0x00000008
[53888.395237][T26938] Call trace:
[53888.395238][T26938] __switch_to+0x164/0x308
[53888.395242][T26938] __schedule+0x624/0xa10
[53888.395245][T26938] schedule+0x84/0xf0
[53888.395248][T26938] schedule_preempt_disabled+0x28/0x44
[53888.395251][T26938] __mutex_lock+0x414/0xdec
[53888.395253][T26938] __mutex_lock_slowpath+0x14/0x24
[53888.395255][T26938] mutex_lock+0x40/0xec
[53888.395256][T26938] vote+0x58/0x32c [qpnp_smb5_main]
[53888.395287][T26938] smb5_usb_plugin_irq_handler+0x164/0xdc8 [qpnp_smb5_main]
[53888.395307][T26938] irq_thread_fn+0x44/0xa4
[53888.395311][T26938] irq_thread+0x164/0x290
[53888.395314][T26938] kthread+0x10c/0x154
[53888.395317][T26938] ret_from_fork+0x10/0x20
smb5_usb_plugin_irq_handler
-> vote
-> mutex_lock /////尝试加锁,发现锁被另一个线程持锁
task:irq/252-usbin-p
卡在 __mutex_lock() 上,且没有被调度(因为 state = D 是不可中断等待)

尝试加锁失败,查看锁的拥有者为kworker/1:3

2.3 kworker/1:3
Task name: kworker/1:3 [affinity: 0x2] pid: 24305 tgid: 24305 cpu: 1 prio: 120 start: 0xffffff80992ab840
state: 0x2[D] exit_state: 0x0 stack base: 0xffffffc017768000
Last_enqueued_ts: 45290.225652949 Last_sleep_ts: 45290.225982532
Stack:
[<ffffffc0090416e0>] __switch_to+0x164
[<ffffffc009041ee4>] __schedule+0x624
[<ffffffc009042354>] schedule+0x84
[<ffffffc009042430>] schedule_preempt_disabled+0x28
[<ffffffc009044564>] __mutex_lock+0x414
[<ffffffc009043744>] __mutex_lock_slowpath+0x14
[<ffffffc009043684>] mutex_lock[jt]+0x70
[<ffffffc003567d34>] rerun_election[qpnp_smb5_main]+0x24
[<ffffffc00354c060>] pl_disable_vote_callback[qpnp_smb5_main]+0x2dc
[<ffffffc003567db4>] rerun_election[qpnp_smb5_main]+0xa4
[<ffffffc00354b868>] pl_fcc_vote_callback[qpnp_smb5_main]+0x1b8
[<ffffffc003567db4>] rerun_election[qpnp_smb5_main]+0xa4
[<ffffffc003548d28>] status_change_work[qpnp_smb5_main]+0x38
[<ffffffc0080e71d4>] process_one_work+0x240
[<ffffffc0080e7754>] worker_thread+0x264
[<ffffffc0080ee6c4>] kthread+0x10c
[<ffffffc00801491c>] ret_from_fork+0x10
status_change_work
--> rerun_election
-> pl_fcc_vote_callback //调用fcc_vote的callback函数
-> rerun_election
-> pl_disable_vote_callback //调用disable_vote 的callback函数
-> rerun_election
-> mutex_lock // 尝试加锁
同样卡在 mutex_lock上,

查看锁的拥有者为kworker/0:3

2.4 kworker/0:3
Task name: kworker/0:3 [affinity: 0x1] pid: 22483 tgid: 22483 cpu: 0 prio: 120 start: 0xffffff816aedcb00
state: 0x2[D] exit_state: 0x0 stack base: 0xffffffc017758000
Last_enqueued_ts: 45304.455460857 Last_sleep_ts: 45304.471351690
Stack:
[<ffffffc0090416e0>] __switch_to+0x164
[<ffffffc009041ee4>] __schedule+0x624
[<ffffffc009042354>] schedule+0x84
[<ffffffc009042430>] schedule_preempt_disabled+0x28
[<ffffffc009044564>] __mutex_lock+0x414
[<ffffffc009043744>] __mutex_lock_slowpath+0x14
[<ffffffc009043684>] mutex_lock[jt]+0x70
[<ffffffc003567804>] vote[qpnp_smb5_main]+0x58
[<ffffffc00354bb70>] usb_icl_vote_callback[qpnp_smb5_main]+0x54
[<ffffffc003567db4>] rerun_election[qpnp_smb5_main]+0xa4
[<ffffffc003557f7c>] smblib_get_prop_batt_status[qpnp_smb5_main][jt]+0x26c
[<ffffffc003554414>] smb5_batt_get_prop[qpnp_smb5_main][jt]+0x1f0
[<ffffffc008b3f09c>] power_supply_get_property[jt]+0x58
[<ffffffc00354ba68>] pl_fv_vote_callback[qpnp_smb5_main]+0x12c
[<ffffffc003567ac8>] vote[qpnp_smb5_main]+0x31c
[<ffffffc003547590>] status_change_work[qpnp_smb5_main][jt]+0xb34
[<ffffffc0080e71d4>] process_one_work+0x240
[<ffffffc0080e7834>] worker_thread+0x344
[<ffffffc0080ee6c4>] kthread+0x10c
[<ffffffc00801491c>] ret_from_fork+0x10
status_change_work
--> vote
-> pl_fv_vote_callback //vote中调用fv_vote的callback
-> power_supply_get_property
-> smb5_batt_get_prop
->smblib_get_prop_batt_status
-> rerun_election // 调用rerun_election
-> usb_icl_vote_callback // 调用 icl_vote的callback
-> vote // callback函数里调用vote
-> mutex_lock // 尝试加锁

查看锁的拥有者kworker/1:3

三、结论
死锁完整链路
- 线程irq/252-usbin-p
vote() /////持有 votable-A 的 mutex
└── smb5_usb_plugin_irq_handler()
└── 等待 votable-A 的 mutex,被 kworker/1:3 占有
- 线程kworker/1:3
pl_disable_vote_callback
--> rerun_election
--> vote //// 持有voteable-B 的mutex锁
--> pl_fcc_vote_callback
--> rerun_election
--> vote /////试图获取voteable-A 的mutex锁
- 线程kworker/1:3已经持有了一个voteable-B的mutex锁
- 然后调用了一个vote_callback,请求获取voteable-A的mutex锁
- 线程kworker/0:3
status_change_work
--> vote /////持有voteable-C的mutex锁
--> pl_fv_vote_callback
--> power_supply_get_property
...
--> vote ////试图获取votable-B的mutex锁,但被线程kworker/1:3持有
总结
irq/252-usbin-p 持有 votable-C,等待votable-A
kworker/0:3 持有votable-A,等待votable-B
kworker/1:3 持有votable-B,等待votable-A
...
...
这是一个很典型的AB-BA锁
四、源码
4.1 irq/252-usbin-p源码
smb5_usb_plugin_irq_handler
irqreturn_t smb5_usb_plugin_irq_handler(int irq, void *data)
{
struct smb_irq_data *irq_data = data;
struct smb_charger *chg = irq_data->parent_data;
if (chg->pd_hard_reset)
smblib_usb_plugin_hard_reset_locked(chg);
else
smblib_usb_plugin_locked(chg);
return IRQ_HANDLED;
}
static void smblib_usb_plugin_locked(struct smb_charger *chg)
{
//...
if (chg->fcc_stepper_enable)
vote(chg->fcc_votable, FCC_STEPPER_VOTER,
true, 1500000);
}
根据栈中的votable的参数可以确认走到这里进行了投票

lock_votable的是 chg->fcc_votable,地址为:0xFFFFFF805601C000,name为FCC
然后准备加锁,发现锁被线程kworker/1:3已经锁住了,继续查看kworker/1:3
4.2 kworker/1:3源码
static void status_change_work(struct work_struct *work)
{
struct pl_data *chip = container_of(work,
struct pl_data, status_change_work.work);
/*
* re-run election for FCC/FV/ICL to ensure all
* votes are reflected on hardware
*/
rerun_election(chip->usb_icl_votable);
rerun_election(chip->fcc_votable); // 栈里走了pl_fcc_vote_callback,所以走到这儿
rerun_election(chip->fv_votable);
if (!is_batt_available(chip))
return;
is_parallel_available(chip);
handle_usb_change(chip);
handle_main_charge_type(chip);
handle_settled_icl_change(chip);
handle_parallel_in_taper(chip);
}
int rerun_election(struct votable *votable)
{
int rc = 0;
int effective_result;
if (!votable)
return -EINVAL;
lock_votable(votable);
effective_result = get_effective_result_locked(votable);
if (votable->callback) //这里调用chip->fcc_votable的callback函数
rc = votable->callback(votable,
votable->data,
effective_result,
get_client_str(votable, votable->effective_client_id));
unlock_votable(votable);
return rc;
}

地址为:0xFFFFFF805601C000的votable 已经再线程kworker/1:3持有
rerun_election调用 pl_fcc_vote_callback回调
static int pl_fcc_vote_callback(struct votable *votable, void *data,
int total_fcc_ua, const char *client)
{
//...
rerun_election(chip->pl_disable_votable);
}
pl_fcc_vote_callback继续调用 rerun_election
此时0xFFFFFF805601C000的votable(FCC),还没有unlock_votable的情况下,再次尝试获取 chip->pl_disable_votable,地址为0xFFFFFF805601FC00,name为PL_DISABLE

并在持有这个锁的前提下,调用 pl_disable_vote_callback函数
此时kworker/1:3的持锁情况:
-> 持锁0xFFFFFF805601C000的votable(FCC) -> 持锁0xFFFFFF805601FC00的votable(PL_DISABLE)
继续查看 pl_disable_vote_callback
static int pl_disable_vote_callback(struct votable *votable,
void *data, int pl_disable, const char *client)
{
//...
rerun_election(chip->fv_votable)
//...
}
再次调用rerun_election,并持锁chip->fv_votable

地址为:0xFFFFFF805601E400,name为:FV
然后再尝试加这个锁的时候发现,这个锁被kworker/0:3持有
此时kworker/1:3的持锁情况:
-> 持锁0xFFFFFF805601C000的votable(FCC) -> 持锁0xFFFFFF805601FC00的votable(PL_DISABLE)
-> 想获取0xFFFFFF805601E400的votable(FV) 被 kworker/0:3持锁中
4.3 kworker/0:3
Task name: kworker/0:3 [affinity: 0x1] pid: 22483 tgid: 22483 cpu: 0 prio: 120 start: 0xffffff816aedcb00
state: 0x2[D] exit_state: 0x0 stack base: 0xffffffc017758000
Last_enqueued_ts: 45304.455460857 Last_sleep_ts: 45304.471351690
Stack:
[<ffffffc0090416e0>] __switch_to+0x164
[<ffffffc009041ee4>] __schedule+0x624
[<ffffffc009042354>] schedule+0x84
[<ffffffc009042430>] schedule_preempt_disabled+0x28
[<ffffffc009044564>] __mutex_lock+0x414
[<ffffffc009043744>] __mutex_lock_slowpath+0x14
[<ffffffc009043684>] mutex_lock[jt]+0x70
[<ffffffc003567804>] vote[qpnp_smb5_main]+0x58
[<ffffffc00354bb70>] usb_icl_vote_callback[qpnp_smb5_main]+0x54
[<ffffffc003567db4>] rerun_election[qpnp_smb5_main]+0xa4
[<ffffffc003557f7c>] smblib_get_prop_batt_status[qpnp_smb5_main][jt]+0x26c
[<ffffffc003554414>] smb5_batt_get_prop[qpnp_smb5_main][jt]+0x1f0
[<ffffffc008b3f09c>] power_supply_get_property[jt]+0x58
[<ffffffc00354ba68>] pl_fv_vote_callback[qpnp_smb5_main]+0x12c
[<ffffffc003567ac8>] vote[qpnp_smb5_main]+0x31c
[<ffffffc003547590>] status_change_work[qpnp_smb5_main][jt]+0xb34
[<ffffffc0080e71d4>] process_one_work+0x240
[<ffffffc0080e7834>] worker_thread+0x344
[<ffffffc0080ee6c4>] kthread+0x10c
[<ffffffc00801491c>] ret_from_fork+0x10
static void status_change_work(struct work_struct *work)
{
struct step_chg_info *chip = container_of(work,
struct step_chg_info, status_change_work.work);
int rc = 0;
union power_supply_propval prop = {0, };
if (!is_batt_available(chip) || !is_bms_available(chip))
goto exit_work;
handle_battery_insertion(chip);
rc = config_step_chg_cycle_count(chip);
if (rc < 0)
pr_err("Couldn't config step cycle count rc = %d\n", rc);
/* skip elapsed_us debounce for handling battery temperature */
rc = handle_jeita(chip); /////////////////////////走到这里
if (rc < 0)
pr_err("Couldn't handle sw jeita rc = %d\n", rc);
rc = handle_step_chg_config(chip);
if (rc < 0)
pr_err("Couldn't handle step rc = %d\n", rc);
/* Remove stale votes on USB removal */
if (is_usb_available(chip)) {
prop.intval = 0;
power_supply_get_property(chip->usb_psy,
POWER_SUPPLY_PROP_PRESENT, &prop);
if (!prop.intval) {
if (chip->usb_icl_votable)
vote(chip->usb_icl_votable, JEITA_VOTER,
false, 0);
}
}
exit_work:
__pm_relax(chip->step_chg_ws);
}
static int handle_jeita(struct step_chg_info *chip)
{
//...
vote(chip->fv_votable, JEITA_VOTER, false, 0)
//...
}
持锁 chip->fv_votable,调用callback函数pl_fv_vote_callback

此时kworker/0:3的持锁情况:
-> 0xFFFFFF805601E400的votable(FV) /////而这个正式kworker/1:3线程等待所需要获取的votable锁
继续调用 pl_fv_vote_callback
static int pl_fv_vote_callback(struct votable *votable, void *data,
int fv_uv, const char *client)
{
//...
if ((chip->float_voltage_uv < fv_uv) && is_batt_available(chip)) {
rc = power_supply_get_property(chip->batt_psy,
POWER_SUPPLY_PROP_STATUS, &pval);
//...
}
走到 smb5_batt_get_prop
static int smb5_batt_get_prop(struct power_supply *psy,
enum power_supply_property psp,
union power_supply_propval *pval)
{
struct smb_charger *chg = power_supply_get_drvdata(psy);
int rc = 0;
int ret = 0, ret_charger_status = 0;
switch (psp) {
case POWER_SUPPLY_PROP_STATUS:
rc = smblib_get_prop_batt_status(chg, pval); ////根据psp判断走到这里
break;
继续 smblib_get_prop_batt_status
int smblib_get_prop_batt_status(struct smb_charger *chg,
union power_supply_propval *val)
{
//...
rerun_election(chg->usb_icl_votable);
//...
}
调用rerun_election,持锁 chg->usb_icl_votable

地址为:0xFFFFFF805601DC00,name为:USB_ICL
此时kworker/0:3的持锁情况:
-> 0xFFFFFF805601E400的votable(FV) /////而这个正式kworker/1:3线程等待所需要获取的votable锁
-> 0xFFFFFF805601DC00的votable(USB_ICL)
继续执行 chg->usb_icl_votable的回调函数 usb_icl_vote_callback
static int usb_icl_vote_callback(struct votable *votable, void *data,
int icl_ua, const char *client)
{
//...
vote(chip->pl_disable_votable, ICL_CHANGE_VOTER, true, 0);
//...
}

准备持锁名为“PL_DISABLE”,地址为:0xFFFFFF805601FC00的votable
然后发现该锁被kworker/1:3持锁
三、根本原因
此时kworker/0:3的持锁情况:
-> 0xFFFFFF805601E400的votable(FV) /////而这个正式kworker/1:3线程等待所需要获取的votable锁
-> 0xFFFFFF805601DC00的votable(USB_ICL)
{% p red,-> 尝试获取0xFFFFFF805601FC00的votable(PL_DISABLE) %}
我们回顾一下kworker/1:3的持锁情况
此时kworker/1:3的持锁情况:
-> 持锁0xFFFFFF805601C000的votable(FCC)
{% p red,-> 持锁0xFFFFFF805601FC00的votable(PL_DISABLE) %}
-> 想获取0xFFFFFF805601E400的votable(FV) 被 kworker/0:3持锁中
