Skip to content

Commit fadead8

Browse files
jacob-kelleranguy11
authored andcommitted
ice: fix concurrent reset and removal of VFs
Commit c503e63 ("ice: Stop processing VF messages during teardown") introduced a driver state flag, ICE_VF_DEINIT_IN_PROGRESS, which is intended to prevent some issues with concurrently handling messages from VFs while tearing down the VFs. This change was motivated by crashes caused while tearing down and bringing up VFs in rapid succession. It turns out that the fix actually introduces issues with the VF driver caused because the PF no longer responds to any messages sent by the VF during its .remove routine. This results in the VF potentially removing its DMA memory before the PF has shut down the device queues. Additionally, the fix doesn't actually resolve concurrency issues within the ice driver. It is possible for a VF to initiate a reset just prior to the ice driver removing VFs. This can result in the remove task concurrently operating while the VF is being reset. This results in similar memory corruption and panics purportedly fixed by that commit. Fix this concurrency at its root by protecting both the reset and removal flows using the existing VF cfg_lock. This ensures that we cannot remove the VF while any outstanding critical tasks such as a virtchnl message or a reset are occurring. This locking change also fixes the root cause originally fixed by commit c503e63 ("ice: Stop processing VF messages during teardown"), so we can simply revert it. Note that I kept these two changes together because simply reverting the original commit alone would leave the driver vulnerable to worse race conditions. Fixes: c503e63 ("ice: Stop processing VF messages during teardown") Signed-off-by: Jacob Keller <jacob.e.keller@intel.com> Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
1 parent 932645c commit fadead8

3 files changed

Lines changed: 27 additions & 18 deletions

File tree

drivers/net/ethernet/intel/ice/ice.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,6 @@ enum ice_pf_state {
280280
ICE_VFLR_EVENT_PENDING,
281281
ICE_FLTR_OVERFLOW_PROMISC,
282282
ICE_VF_DIS,
283-
ICE_VF_DEINIT_IN_PROGRESS,
284283
ICE_CFG_BUSY,
285284
ICE_SERVICE_SCHED,
286285
ICE_SERVICE_DIS,

drivers/net/ethernet/intel/ice/ice_main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1799,7 +1799,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
17991799
* reset, so print the event prior to reset.
18001800
*/
18011801
ice_print_vf_rx_mdd_event(vf);
1802+
mutex_lock(&pf->vf[i].cfg_lock);
18021803
ice_reset_vf(&pf->vf[i], false);
1804+
mutex_unlock(&pf->vf[i].cfg_lock);
18031805
}
18041806
}
18051807
}

drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -500,8 +500,6 @@ void ice_free_vfs(struct ice_pf *pf)
500500
struct ice_hw *hw = &pf->hw;
501501
unsigned int tmp, i;
502502

503-
set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
504-
505503
if (!pf->vf)
506504
return;
507505

@@ -519,22 +517,26 @@ void ice_free_vfs(struct ice_pf *pf)
519517
else
520518
dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
521519

522-
/* Avoid wait time by stopping all VFs at the same time */
523-
ice_for_each_vf(pf, i)
524-
ice_dis_vf_qs(&pf->vf[i]);
525-
526520
tmp = pf->num_alloc_vfs;
527521
pf->num_qps_per_vf = 0;
528522
pf->num_alloc_vfs = 0;
529523
for (i = 0; i < tmp; i++) {
530-
if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
524+
struct ice_vf *vf = &pf->vf[i];
525+
526+
mutex_lock(&vf->cfg_lock);
527+
528+
ice_dis_vf_qs(vf);
529+
530+
if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
531531
/* disable VF qp mappings and set VF disable state */
532-
ice_dis_vf_mappings(&pf->vf[i]);
533-
set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
534-
ice_free_vf_res(&pf->vf[i]);
532+
ice_dis_vf_mappings(vf);
533+
set_bit(ICE_VF_STATE_DIS, vf->vf_states);
534+
ice_free_vf_res(vf);
535535
}
536536

537-
mutex_destroy(&pf->vf[i].cfg_lock);
537+
mutex_unlock(&vf->cfg_lock);
538+
539+
mutex_destroy(&vf->cfg_lock);
538540
}
539541

540542
if (ice_sriov_free_msix_res(pf))
@@ -570,7 +572,6 @@ void ice_free_vfs(struct ice_pf *pf)
570572
i);
571573

572574
clear_bit(ICE_VF_DIS, pf->state);
573-
clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
574575
clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
575576
}
576577

@@ -1498,6 +1499,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
14981499
ice_for_each_vf(pf, v) {
14991500
vf = &pf->vf[v];
15001501

1502+
mutex_lock(&vf->cfg_lock);
1503+
15011504
vf->driver_caps = 0;
15021505
ice_vc_set_default_allowlist(vf);
15031506

@@ -1512,6 +1515,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
15121515
ice_vf_pre_vsi_rebuild(vf);
15131516
ice_vf_rebuild_vsi(vf);
15141517
ice_vf_post_vsi_rebuild(vf);
1518+
1519+
mutex_unlock(&vf->cfg_lock);
15151520
}
15161521

15171522
if (ice_is_eswitch_mode_switchdev(pf))
@@ -1562,6 +1567,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
15621567
u32 reg;
15631568
int i;
15641569

1570+
lockdep_assert_held(&vf->cfg_lock);
1571+
15651572
dev = ice_pf_to_dev(pf);
15661573

15671574
if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
@@ -2061,9 +2068,12 @@ void ice_process_vflr_event(struct ice_pf *pf)
20612068
bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
20622069
/* read GLGEN_VFLRSTAT register to find out the flr VFs */
20632070
reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
2064-
if (reg & BIT(bit_idx))
2071+
if (reg & BIT(bit_idx)) {
20652072
/* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
2073+
mutex_lock(&vf->cfg_lock);
20662074
ice_reset_vf(vf, true);
2075+
mutex_unlock(&vf->cfg_lock);
2076+
}
20672077
}
20682078
}
20692079

@@ -2140,7 +2150,9 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
21402150
if (!vf)
21412151
return;
21422152

2153+
mutex_lock(&vf->cfg_lock);
21432154
ice_vc_reset_vf(vf);
2155+
mutex_unlock(&vf->cfg_lock);
21442156
}
21452157

21462158
/**
@@ -4625,10 +4637,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
46254637
struct device *dev;
46264638
int err = 0;
46274639

4628-
/* if de-init is underway, don't process messages from VF */
4629-
if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state))
4630-
return;
4631-
46324640
dev = ice_pf_to_dev(pf);
46334641
if (ice_validate_vf_id(pf, vf_id)) {
46344642
err = -EINVAL;

0 commit comments

Comments
 (0)