diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index ae19670..78110c1 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -6459,7 +6459,60 @@ def bgpProto_timer_policy_already_existing_check(tversion, cversion, **kwargs): return Result(result=result, headers=headers, data=data, unformatted_headers=unformatted_headers, unformatted_data=unformatted_data, recommended_action=recommended_action, doc_url=doc_url) -# ---- Script Execution ----. +@check_wrapper(check_title="WRED with Affected FM Models") +def wred_affected_model_check(tversion, fabric_nodes, **kwargs): + result = PASS + headers = ["Node ID", "Node Name", "Model"] + data = [] + recommended_action = "Disable WRED in fabric or upgrade to a release newer than 6.1(5e) or 6.2(2d)." + doc_url = "https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#wred-with-affected-fm-models" + + if not tversion: + return Result(result=MANUAL, msg=TVER_MISSING) + + version_affected = ( + (tversion.major1 == "6" and tversion.major2 == "1" and (tversion.older_than("6.1(5e)") or tversion.same_as("6.1(5e)"))) + or (tversion.major1 == "6" and tversion.major2 == "2" and tversion.older_than("6.2(2e)")) + ) + if not version_affected: + return Result(result=NA, msg=VER_NOT_AFFECTED) + + affected_models = {"N9K-C9504-FM-E", "N9K-C9508-FM-E", "N9K-C9516-FM-E"} + + node_name_map = { + node["fabricNode"]["attributes"]["id"]: node["fabricNode"]["attributes"]["name"] + for node in fabric_nodes + } + + for cong in icurl("class", "qosCong.json"): + if cong.get("qosCong", {}).get("attributes", {}).get("algo") == "wred": + break + else: + return Result(result=PASS, msg="WRED not enabled.") + + unique_list = {} + for obj in icurl("class", "eqptFC.json"): + attr = obj["eqptFC"]["attributes"] + model = attr.get("model", "") + if model not in affected_models: + continue + dn = attr.get("dn", "") + if not dn.startswith("topology/"): + continue + dn_match = re.search(node_regex, dn) + if not dn_match: + continue + node_id = dn_match.group("node") + unique_list[(node_id, model)] = [node_id, node_name_map.get(node_id, ""), model] + data = list(unique_list.values()) + + if data: + return Result(result=FAIL_O, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) + + return Result(result=NA, msg="No affected Fabric module found.") + + +# ---- Script Execution ---- def parse_args(args): @@ -6631,6 +6684,7 @@ class CheckManager: n9k_c9408_model_lem_count_check, inband_management_policy_misconfig_check, bgpProto_timer_policy_already_existing_check, + wred_affected_model_check, ] ssh_checks = [ # General diff --git a/docs/docs/validations.md b/docs/docs/validations.md index 807c5ab..bd0de6d 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -204,6 +204,7 @@ Items | Defect | This Script [Multi-Pod Modular Spine Bootscript File][d32] | CSCwr66848 | :white_check_mark: | :no_entry_sign: [Inband Management Policy Misconfiguration][d33]| CSCwd40071 | :white_check_mark: | :no_entry_sign: [BgpProto timer policy already existing][d34] | CSCwt78235 | :white_check_mark: | :no_entry_sign: +[WRED with Affected FM Models][d35] | CSCwt50713 | :white_check_mark: | :no_entry_sign: [d1]: #ep-announce-compatibility [d2]: #eventmgr-db-size-defect-susceptibility @@ -239,6 +240,7 @@ Items | Defect | This Script [d32]: #multi-pod-modular-spine-bootscript-file [d33]: #inband-management-policy-misconfiguration [d34]: #bgpProto-timer-policy-already-existing +[d35]: #wred-with-affected-fm-models ## General Check Details @@ -2798,6 +2800,18 @@ Administrators may be unable to access or operate the APIC GUI, potentially impa This check will verify the count of the `svccoreCtrlr` Managed Object and raise and alarm with the bug if object count found more than 240. Remove the content or objects of `svccoreCtrlr` or `svccoreNode`. Contact Cisco TAC or upgrade to a release containing the fix for CSCws84232 before proceeding with an upgrade. +### WRED with Affected FM Models + +Due to [CSCwt50713][72], when WRED (Weighted Random Early Detection) is enabled and specific Fabric Module (FM) hardware models are present in the fabric, the spine switch may crash after moving to an affected ACI release in the 6.1(x) or 6.2(x) range. The crash is specifically triggered by running a tech-support collection or QoS-related commands on the affected spine. + +Affected versions: +version <= 6.1(5e) or version < 6.2(2e). + +Affected hardware models: N9K-C9504-FM-E, N9K-C9508-FM-E, N9K-C9516-FM-E. + +To avoid this issue, disable WRED on the affected nodes or upgrade to a release newer than 6.1(5e) in the 6.1(x) train or 6.2(2e) or later in the 6.2(x) train. + + ### BgpProto Timer Policy Already Existing This bug [CSCwt78235][71] validates `F0467` faults where `changeSet` contains 'bgpProt-policy-already-existing'. The fault indicates conflicting BGP protocol timer policy under an L3Outs deployed in same vrf under same node. If this fault is not resolved, l3out will not be programmed properly in the leaf after the clean reboot or the upgrade. @@ -2875,3 +2889,4 @@ This bug [CSCwt78235][71] validates `F0467` faults where `changeSet` contains 'b [69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232 [70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCvo27498 [71]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt78235 +[72]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt50713 diff --git a/tests/checks/wred_affected_model_check/eqptFC_affected.json b/tests/checks/wred_affected_model_check/eqptFC_affected.json new file mode 100644 index 0000000..4e87550 --- /dev/null +++ b/tests/checks/wred_affected_model_check/eqptFC_affected.json @@ -0,0 +1,10 @@ +[ + { + "eqptFC": { + "attributes": { + "dn": "topology/pod-1/node-1001/sys/ch/fcslot-1/fc", + "model": "N9K-C9508-FM-E" + } + } + } +] diff --git a/tests/checks/wred_affected_model_check/eqptFC_duplicate.json b/tests/checks/wred_affected_model_check/eqptFC_duplicate.json new file mode 100644 index 0000000..d121050 --- /dev/null +++ b/tests/checks/wred_affected_model_check/eqptFC_duplicate.json @@ -0,0 +1,18 @@ +[ + { + "eqptFC": { + "attributes": { + "dn": "topology/pod-1/node-1001/sys/ch/fcslot-2/fc", + "model": "N9K-C9508-FM-E" + } + } + }, + { + "eqptFC": { + "attributes": { + "dn": "topology/pod-1/node-1001/sys/ch/fcslot-6/fc", + "model": "N9K-C9508-FM-E" + } + } + } +] diff --git a/tests/checks/wred_affected_model_check/eqptFC_empty.json b/tests/checks/wred_affected_model_check/eqptFC_empty.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/tests/checks/wred_affected_model_check/eqptFC_empty.json @@ -0,0 +1 @@ +[] diff --git a/tests/checks/wred_affected_model_check/eqptFC_mixed.json b/tests/checks/wred_affected_model_check/eqptFC_mixed.json new file mode 100644 index 0000000..0e346b0 --- /dev/null +++ b/tests/checks/wred_affected_model_check/eqptFC_mixed.json @@ -0,0 +1,18 @@ +[ + { + "eqptFC": { + "attributes": { + "dn": "topology/pod-1/node-1001/sys/ch/fcslot-1/fc", + "model": "N9K-C9508-FM-E" + } + } + }, + { + "eqptFC": { + "attributes": { + "dn": "topology/pod-1/node-1001/sys/ch/fcslot-2/fc", + "model": "N9K-C9504-FM-G" + } + } + } +] diff --git a/tests/checks/wred_affected_model_check/fabricNode_spine.json b/tests/checks/wred_affected_model_check/fabricNode_spine.json new file mode 100644 index 0000000..c70eb8f --- /dev/null +++ b/tests/checks/wred_affected_model_check/fabricNode_spine.json @@ -0,0 +1,13 @@ +[ + { + "fabricNode": { + "attributes": { + "dn": "topology/pod-1/node-1001", + "id": "1001", + "name": "spine1001", + "role": "spine", + "model": "N9K-C9504" + } + } + } +] diff --git a/tests/checks/wred_affected_model_check/qosCong_mixed.json b/tests/checks/wred_affected_model_check/qosCong_mixed.json new file mode 100644 index 0000000..cb3a900 --- /dev/null +++ b/tests/checks/wred_affected_model_check/qosCong_mixed.json @@ -0,0 +1,16 @@ +[ + { + "qosCong": { + "attributes": { + "algo": "tail-drop" + } + } + }, + { + "qosCong": { + "attributes": { + "algo": "wred" + } + } + } +] diff --git a/tests/checks/wred_affected_model_check/qosCong_tail_drop.json b/tests/checks/wred_affected_model_check/qosCong_tail_drop.json new file mode 100644 index 0000000..dd814ec --- /dev/null +++ b/tests/checks/wred_affected_model_check/qosCong_tail_drop.json @@ -0,0 +1,9 @@ +[ + { + "qosCong": { + "attributes": { + "algo": "tail-drop" + } + } + } +] diff --git a/tests/checks/wred_affected_model_check/qosCong_wred.json b/tests/checks/wred_affected_model_check/qosCong_wred.json new file mode 100644 index 0000000..1c297c8 --- /dev/null +++ b/tests/checks/wred_affected_model_check/qosCong_wred.json @@ -0,0 +1,9 @@ +[ + { + "qosCong": { + "attributes": { + "algo": "wred" + } + } + } +] diff --git a/tests/checks/wred_affected_model_check/test_wred_affected_model_check.py b/tests/checks/wred_affected_model_check/test_wred_affected_model_check.py new file mode 100644 index 0000000..f3610a9 --- /dev/null +++ b/tests/checks/wred_affected_model_check/test_wred_affected_model_check.py @@ -0,0 +1,121 @@ +import os +import pytest +import importlib +from helpers.utils import read_data + +script = importlib.import_module("aci-preupgrade-validation-script") + +dir = os.path.dirname(os.path.abspath(__file__)) + +test_function = "wred_affected_model_check" + +# icurl queries +qosCong_api = "qosCong.json" +eqptFC_api = "eqptFC.json" + + +@pytest.mark.parametrize( + "tversion, fabric_nodes, icurl_outputs, expected_result, expected_data", + [ + # Case 1: Target version not supplied. Expected: MANUAL. + ( + None, + read_data(dir, "fabricNode_spine.json"), + {}, + script.MANUAL, + [], + ), + # Case 2: Target version 6.2(2e) is the first fixed release and not in the affected range. + # Version gate fails. Expected: NA without any API calls. + ( + "6.2(2e)", + read_data(dir, "fabricNode_spine.json"), + {}, + script.NA, + [], + ), + # Case 2: All 3 gates triggered via an affected FM on a spine node. + # Version 6.2(1g) is in affected range, WRED is enabled, FM model N9K-C9508-FM-E is affected. + # Expected: FAIL_O with node 1001 reported. + ( + "6.2(1g)", + read_data(dir, "fabricNode_spine.json"), + { + qosCong_api: read_data(dir, "qosCong_wred.json"), + eqptFC_api: read_data(dir, "eqptFC_affected.json"), + }, + script.FAIL_O, + [["1001", "spine1001", "N9K-C9508-FM-E"]], + ), + # Case 3: Version is affected but no affected FM hardware found. + # WRED is enabled so the script proceeds to the FM check, which finds nothing. + # Hardware gate fails. Expected: NA - issue is model-specific. + ( + "6.1(5e)", + read_data(dir, "fabricNode_spine.json"), + { + qosCong_api: read_data(dir, "qosCong_wred.json"), + eqptFC_api: read_data(dir, "eqptFC_empty.json"), + }, + script.NA, + [], + ), + # Case 4: Version is affected and FM is affected, but WRED is not enabled (tail-drop). + # WRED gate fails. Expected: PASS - confirms all 3 gates must be true simultaneously. + ( + "6.1(5e)", + read_data(dir, "fabricNode_spine.json"), + { + qosCong_api: read_data(dir, "qosCong_tail_drop.json"), + eqptFC_api: read_data(dir, "eqptFC_affected.json"), + }, + script.PASS, + [], + ), + # Case 5: Multiple FM objects - one affected (N9K-C9508-FM-E), one unaffected (N9K-C9504-FM-G). + # WRED is enabled. Only the affected FM should be reported. + # Expected: FAIL_O with only the affected FM row reported. + ( + "6.1(5e)", + read_data(dir, "fabricNode_spine.json"), + { + qosCong_api: read_data(dir, "qosCong_wred.json"), + eqptFC_api: read_data(dir, "eqptFC_mixed.json"), + }, + script.FAIL_O, + [["1001", "spine1001", "N9K-C9508-FM-E"]], + ), + # Case 6: Version is affected, WRED is enabled, but no affected FM models found. + # FM gate fails. Expected: NA. + ( + "6.2(1g)", + read_data(dir, "fabricNode_spine.json"), + { + qosCong_api: read_data(dir, "qosCong_wred.json"), + eqptFC_api: read_data(dir, "eqptFC_empty.json"), + }, + script.NA, + [], + ), + # Case 7: Same node has two FM slots with the same affected model (duplicate eqptFC objects). + # Deduplication by (node_id, model) must result in only one row. + # Expected: FAIL_O with a single row for node 1001. + ( + "6.2(1g)", + read_data(dir, "fabricNode_spine.json"), + { + qosCong_api: read_data(dir, "qosCong_wred.json"), + eqptFC_api: read_data(dir, "eqptFC_duplicate.json"), + }, + script.FAIL_O, + [["1001", "spine1001", "N9K-C9508-FM-E"]], + ), + ], +) +def test_logic(run_check, mock_icurl, tversion, fabric_nodes, expected_result, expected_data): + result = run_check( + tversion=script.AciVersion(tversion) if tversion else None, + fabric_nodes=fabric_nodes, + ) + assert result.result == expected_result + assert result.data == expected_data