-
Notifications
You must be signed in to change notification settings - Fork 48
Added pre-upgrade check for defect CSCwt69100 #385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: v4.2.0-dev
Are you sure you want to change the base?
Changes from all commits
a66b043
1b64143
300a70f
d64dcc6
17990b4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ | |
| from textwrap import TextWrapper | ||
| from getpass import getpass | ||
| from collections import defaultdict, OrderedDict | ||
| from datetime import datetime | ||
| from datetime import datetime, timedelta | ||
| from argparse import ArgumentParser | ||
| from itertools import chain | ||
| import threading | ||
|
|
@@ -6410,6 +6410,40 @@ def svccore_excessive_data_check(**kwargs): | |
| return Result(result=ERROR, msg="Error occurred while fetching svccore object counts: {}".format(str(e)), doc_url=doc_url) | ||
|
|
||
|
|
||
| @check_wrapper(check_title="Stale dbgacEpgSummaryTask Objects") | ||
| def stale_dbgacEpgSummaryTask_check(tversion, **kwargs): | ||
| result = PASS | ||
| headers = ["DN", "Start Time"] | ||
| data = [] | ||
| recommended_action = "Contact Cisco TAC to delete the offending dbgacEpgSummaryTask objects before the upgrade. For more details, refer to the workaround in [CSCwt69100](https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt69100)." | ||
| doc_url = "https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#stale-dbgacepgsummarytask-objects" | ||
|
|
||
| if not tversion: | ||
| return Result(result=MANUAL, msg=TVER_MISSING) | ||
|
Comment on lines
+6421
to
+6422
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does fix stop the crash? And if we want to be able to flag and do research on this anyways, we could perform check regardless of tversion just to see if the object exists. TBD. |
||
|
|
||
| version_affected = ( | ||
| (tversion.major1 == "6" and tversion.major2 == "1" and (tversion.older_than("6.1(5e)") or tversion.same_as("6.1(5e)"))) | ||
| or (tversion.major1 == "6" and tversion.major2 == "2" and (tversion.older_than("6.2(1g)") or tversion.same_as("6.2(1g)"))) | ||
| ) | ||
| if not version_affected: | ||
| return Result(result=NA, msg=VER_NOT_AFFECTED) | ||
|
|
||
| threshold = datetime.utcnow() - timedelta(hours=24) | ||
| for obj in icurl("class", 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")'): | ||
| attr = obj["dbgacEpgSummaryTask"]["attributes"] | ||
| dn = attr.get("dn", "") | ||
| start_ts = attr.get("startTs", "") | ||
| try: | ||
| task_dt = datetime.strptime(start_ts[:19], "%Y-%m-%dT%H:%M:%S") | ||
| except ValueError: | ||
| continue | ||
| if task_dt < threshold: | ||
| data.append([dn, start_ts]) | ||
|
Priyanka-Patil14 marked this conversation as resolved.
|
||
|
|
||
| if data: | ||
| result = FAIL_UF | ||
| return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) | ||
|
|
||
| # ---- Script Execution ---- | ||
|
|
||
|
|
||
|
|
@@ -6581,6 +6615,7 @@ class CheckManager: | |
| rogue_ep_coop_exception_mac_check, | ||
| n9k_c9408_model_lem_count_check, | ||
| inband_management_policy_misconfig_check, | ||
| stale_dbgacEpgSummaryTask_check, | ||
| ] | ||
| ssh_checks = [ | ||
| # General | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -203,6 +203,7 @@ Items | Defect | This Script | |
| [N9K-C9408 with more than 5 N9K-X9400-16W LEMs][d31] | CSCws82819 | :white_check_mark: | :no_entry_sign: | ||
| [Multi-Pod Modular Spine Bootscript File][d32] | CSCwr66848 | :white_check_mark: | :no_entry_sign: | ||
| [Inband Management Policy Misconfiguration][d33]| CSCwd40071 | :white_check_mark: | :no_entry_sign: | ||
| [Stale dbgacEpgSummaryTask Objects][d34] | CSCwt69100 | :white_check_mark: | :no_entry_sign: | ||
|
|
||
| [d1]: #ep-announce-compatibility | ||
| [d2]: #eventmgr-db-size-defect-susceptibility | ||
|
|
@@ -237,6 +238,7 @@ Items | Defect | This Script | |
| [d31]: #n9k-c9408-with-more-than-5-n9k-x9400-16w-lems | ||
| [d32]: #multi-pod-modular-spine-bootscript-file | ||
| [d33]: #inband-management-policy-misconfiguration | ||
| [d34]: #stale-dbgacepgsummarytask-objects | ||
|
|
||
| ## General Check Details | ||
|
|
||
|
|
@@ -2797,6 +2799,12 @@ Administrators may be unable to access or operate the APIC GUI, potentially impa | |
|
|
||
| This check will verify the count of the `svccoreCtrlr` Managed Object and raise and alarm with the bug if object count found more than 240. Remove the content or objects of `svccoreCtrlr` or `svccoreNode`. Contact Cisco TAC or upgrade to a release containing the fix for CSCws84232 before proceeding with an upgrade. | ||
|
|
||
| ### Stale dbgacEpgSummaryTask Objects | ||
|
|
||
| Due to [CSCwt69100][70], a stale `dbgacEpgSummaryTask` object stuck in `processing` state with empty content can cause the policymgr process to crash on all APICs during an upgrade or process restart. | ||
|
|
||
| Contact Cisco TAC to delete the offending `dbgacEpgSummaryTask` objects before the upgrade. For more details, refer to the workaround in [CSCwt69100][70]. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this needs to be udpated with true next steps, "Contact TAC for next steps" |
||
|
|
||
|
|
||
| [0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script | ||
| [1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html | ||
|
|
@@ -2867,4 +2875,5 @@ This check will verify the count of the `svccoreCtrlr` Managed Object and raise | |
| [66]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwr66848 | ||
| [67]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwh80837 | ||
| [68]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwd40071 | ||
| [69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232 | ||
| [69]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCws84232 | ||
| [70]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwt69100 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| [ | ||
| { | ||
| "dbgacEpgSummaryTask": { | ||
| "attributes": { | ||
| "dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "operSt": "processing", | ||
| "startTs": "2026-01-14T11:00:00.000+00:00" | ||
| } | ||
| } | ||
| }, | ||
| { | ||
| "dbgacEpgSummaryTask": { | ||
| "attributes": { | ||
| "dn": "action/policymgrsubj-[uni/tn-TN_TEST/epgToEpg-EPG_TEST_A_TO_EPG_TEST_B/dstepg-[uni/tn-TN_TEST/ap-AP_TEST/epg-EPG_TEST_B]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "operSt": "processing", | ||
| "startTs": "2026-01-14T12:01:00.000+00:00" | ||
| } | ||
| } | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| [ | ||
| { | ||
| "dbgacEpgSummaryTask": { | ||
| "attributes": { | ||
| "dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "operSt": "processing", | ||
| "startTs": "2026-01-14T12:00:00.000+00:00" | ||
| } | ||
| } | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| [ | ||
| { | ||
| "dbgacEpgSummaryTask": { | ||
| "attributes": { | ||
| "dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "operSt": "processing", | ||
| "startTs": "2024-01-01T00:00:00.000+00:00" | ||
| } | ||
| } | ||
| }, | ||
| { | ||
| "dbgacEpgSummaryTask": { | ||
| "attributes": { | ||
| "dn": "action/policymgrsubj-[uni/tn-TN_TEST/epgToEpg-EPG_TEST_A_TO_EPG_TEST_B/dstepg-[uni/tn-TN_TEST/ap-AP_TEST/epg-EPG_TEST_B]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "operSt": "processing", | ||
| "startTs": "2026-01-15T11:30:00.000+00:00" | ||
| } | ||
| } | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| [ | ||
| { | ||
| "dbgacEpgSummaryTask": { | ||
| "attributes": { | ||
| "dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "operSt": "processing", | ||
| "startTs": "2026-01-15T11:30:00.000+00:00" | ||
| } | ||
| } | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| [ | ||
| { | ||
| "dbgacEpgSummaryTask": { | ||
| "attributes": { | ||
| "dn": "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "operSt": "processing", | ||
| "startTs": "2024-01-01T00:00:00.000+00:00" | ||
| } | ||
| } | ||
| } | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,145 @@ | ||
| import os | ||
| import pytest | ||
| import importlib | ||
| from datetime import datetime | ||
| from helpers.utils import read_data | ||
|
|
||
| script = importlib.import_module("aci-preupgrade-validation-script") | ||
|
|
||
| dir = os.path.dirname(os.path.abspath(__file__)) | ||
|
|
||
| test_function = "stale_dbgacEpgSummaryTask_check" | ||
|
|
||
| # icurl query key | ||
| task_api = 'dbgacEpgSummaryTask.json?query-target-filter=eq(dbgacEpgSummaryTask.operSt,"processing")' | ||
|
|
||
| # Fixed "now" used by mock_datetime fixture: 2026-01-15 12:00:00 UTC | ||
| # Stale threshold = 2026-01-14 12:00:00 UTC (24h before fixed now) | ||
| # dbgacEpgSummaryTask_stale.json -> startTs 2024-01-01 (way before threshold) -> FAIL_UF | ||
| # dbgacEpgSummaryTask_recent.json -> startTs 2026-01-15 11:30 UTC (30 min before fixed now) -> PASS | ||
| FIXED_NOW = datetime(2026, 1, 15, 12, 0, 0) | ||
|
|
||
|
|
||
| class MockDatetime: | ||
| """Replaces datetime class in script to return a fixed 'now' for deterministic tests.""" | ||
| @staticmethod | ||
| def utcnow(): | ||
| return FIXED_NOW | ||
|
|
||
| @staticmethod | ||
| def strptime(date_string, format): | ||
| return datetime.strptime(date_string, format) | ||
|
|
||
| def __new__(cls, *args, **kwargs): | ||
| return datetime(*args, **kwargs) | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def mock_datetime(monkeypatch): | ||
| """Monkeypatches script.datetime so utcnow() returns a fixed timestamp.""" | ||
| monkeypatch.setattr(script, "datetime", MockDatetime) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "tversion, icurl_outputs, expected_result, expected_data", | ||
|
Priyanka-Patil14 marked this conversation as resolved.
|
||
| [ | ||
| # Case 1: tversion is missing. Expected: MANUAL. | ||
| ( | ||
| None, | ||
| {}, | ||
| script.MANUAL, | ||
| [], | ||
| ), | ||
| # Case 2: Target version 6.2(1h) is beyond both affected ranges (6.1(5e) and 6.2(1g)). | ||
| # The target binary has the fix so version gate fails. Expected: NA without any API calls. | ||
| ( | ||
| "6.2(1h)", | ||
| {}, | ||
| script.NA, | ||
| [], | ||
| ), | ||
| # Case 2: Target version 6.1(5e) is affected, no dbgacEpgSummaryTask objects found. | ||
| # No stale tasks present -- system is safe. Expected: PASS. | ||
| ( | ||
| "6.1(5e)", | ||
| { | ||
| task_api: read_data(dir, "dbgacEpgSummaryTask_empty.json"), | ||
| }, | ||
| script.PASS, | ||
| [], | ||
| ), | ||
| # Case 3: Target version 6.1(5e) is affected, one task in processing state but startTs is | ||
| # only 30 minutes old (within 24-hour threshold). Not considered stale. | ||
| # Expected: PASS. | ||
| ( | ||
| "6.1(5e)", | ||
| { | ||
| task_api: read_data(dir, "dbgacEpgSummaryTask_recent.json"), | ||
| }, | ||
| script.PASS, | ||
| [], | ||
| ), | ||
| # Case 4: Target version 6.1(5e) is affected, one task stuck in processing with startTs | ||
| # from 2024 (way older than 24 hours). Stale task detected. | ||
| # Expected: FAIL_UF with the offending DN and startTs reported. | ||
| ( | ||
| "6.1(5e)", | ||
| { | ||
| task_api: read_data(dir, "dbgacEpgSummaryTask_stale.json"), | ||
| }, | ||
| script.FAIL_UF, | ||
| [ | ||
| [ | ||
| "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "2024-01-01T00:00:00.000+00:00", | ||
| ] | ||
| ], | ||
| ), | ||
| # Case 5: Target version 6.2(1g) is affected, two tasks -- one stale (2024), one recent. | ||
| # Only the stale task should be reported. Expected: FAIL_UF with one row. | ||
| ( | ||
| "6.2(1g)", | ||
| { | ||
| task_api: read_data(dir, "dbgacEpgSummaryTask_mixed.json"), | ||
| }, | ||
| script.FAIL_UF, | ||
| [ | ||
| [ | ||
| "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "2024-01-01T00:00:00.000+00:00", | ||
| ] | ||
| ], | ||
| ), | ||
| # Case 6: Task started exactly 24 hours ago (startTs == threshold). | ||
| # Boundary condition: task_dt < threshold is False when equal. Expected: PASS. | ||
| ( | ||
| "6.1(5e)", | ||
| { | ||
| task_api: read_data(dir, "dbgacEpgSummaryTask_exactly_24h.json"), | ||
| }, | ||
| script.PASS, | ||
| [], | ||
| ), | ||
| # Case 7: Two tasks -- one at 25 hours (stale) and one at 23h59m (not stale). | ||
| # Only the 25h task crosses the threshold. Expected: FAIL_UF with one row. | ||
| ( | ||
| "6.1(5e)", | ||
| { | ||
| task_api: read_data(dir, "dbgacEpgSummaryTask_boundary_combo.json"), | ||
| }, | ||
| script.FAIL_UF, | ||
| [ | ||
| [ | ||
| "action/policymgrsubj-[uni/tn-TN_PROD/epgToEpg-EPG_PROD_FE_TO_EPG_PROD_BE/dstepg-[uni/tn-TN_PROD/ap-AP_PROD/epg-EPG_PROD_BE]]/dbgacEpgSummaryTask-ReportODACDef", | ||
| "2026-01-14T11:00:00.000+00:00", | ||
| ] | ||
| ], | ||
| ), | ||
| ], | ||
| ) | ||
|
Priyanka-Patil14 marked this conversation as resolved.
|
||
| def test_logic(run_check, mock_icurl, mock_datetime, tversion, icurl_outputs, expected_result, expected_data): | ||
| result = run_check( | ||
| tversion=script.AciVersion(tversion) if tversion else None, | ||
| ) | ||
| assert result.result == expected_result | ||
| assert result.data == expected_data | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fix this recomended action, "Contact Cisco TAC"