From 3e595fdd92036816eecfeb19cba7c8a8e172b2b1 Mon Sep 17 00:00:00 2001 From: Enrique Estrada Date: Tue, 10 Feb 2026 12:45:39 -0600 Subject: [PATCH] issue NewValidation: CSCwq58901 Fixes #308 fixed --- aci-preupgrade-validation-script.py | 79 +++++++ docs/docs/validations.md | 20 ++ .../sup_a_filesystem_check/eqptSupC-pos.json | 206 ++++++++++++++++++ .../sup_a_filesystem_check/fabricNode.json | 98 +++++++++ .../test_sup_a_filesystem_check.py | 189 ++++++++++++++++ 5 files changed, 592 insertions(+) create mode 100644 tests/checks/sup_a_filesystem_check/eqptSupC-pos.json create mode 100644 tests/checks/sup_a_filesystem_check/fabricNode.json create mode 100644 tests/checks/sup_a_filesystem_check/test_sup_a_filesystem_check.py diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index 4b83f4c..f040308 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -6026,6 +6026,84 @@ def apic_downgrade_compat_warning_check(cversion, tversion, **kwargs): return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) +# Connection Base Check +@check_wrapper(check_title='Sup-A(+) /MNT/PSS Filesystem Check') +def sup_a_filesystem_check(username, password, fabric_nodes, tversion, **kwargs): + result = PASS + headers = ["Switch ID", "Switch Name", "/mnt/pss Folder usage (MB)", "File Location"] + data = [] + recommended_action = 'Review The Bug RNE and apply the workaround to remove any unwanted file before Upgrade' + doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations#sup-aa-mnt-pss-filesystem' + + if not tversion: + return Result(result=MANUAL, msg=TVER_MISSING) + if tversion.newer_than("6.1(4a)"): + return Result(result=NA, msg='Version not Affected') + + has_error = False + n9k_sup_api = 'eqptSupC.json' + n9k_sup_api += '?query-target-filter=and(wcard(eqptSupC.model,"N9K-SUP-A"))' + # Affected model is Sup-A or Sup-A+ + n9k_sup_mos = icurl('class', n9k_sup_api) + if not n9k_sup_mos: + return Result(result=NA, msg="No affected Supervisors found", doc_url=doc_url) + + nodes = [] + sup_regex = node_regex + r'/sys/ch/supslot-(?P\d)/sup' + for sup in n9k_sup_mos: + node_match = re.search(sup_regex, sup["eqptSupC"]["attributes"]["dn"]) + if node_match: + nodes.append(node_match.group("node")) + nodes = list(set(nodes)) + + switches = [node for node in fabric_nodes if ( + node["fabricNode"]["attributes"]["id"] in nodes)] + + for switch in switches: + switch_id = switch["fabricNode"]["attributes"]["id"] + switch_name = switch["fabricNode"]["attributes"]["name"] + switch_addr = switch["fabricNode"]["attributes"]["address"] + + try: + c = Connection(switch_addr) + c.username = username + c.password = password + c.log = LOG_FILE + c.connect() + except Exception as e: + data.append([switch_id, switch_name, "-", str(e)]) + has_error = True + continue + try: + cmd = r"du -ahm /mnt/pss/bootlogs/ | sort -rh | head -15" + c.cmd(cmd) + if "No such file or directory" in c.output: + data.append([switch_id, switch_name, '/mnt/pss/bootlogs not found', "Check user permissions or retry as 'apic#fallback\\\\admin'"]) + has_error = True + continue + + mntpss = c.output.split("\n") + for line in mntpss: + total_filesystem_regex = r"(?P\d{1,}) \/mnt\/pss\/bootlogs\/(?P\w+\/\w+.+)" + mntpss_usage_match = re.match(total_filesystem_regex, line) + if mntpss_usage_match: + filesize = mntpss_usage_match.group("filesize") + if int(filesize) > 30: # More than 30MB per file + filename = "/mnt/pss/bootlogs/"+ mntpss_usage_match.group("filename") + data.append([switch_id, switch_name, filesize, filename]) + except Exception as e: + data.append([switch_id, switch_name, "-", str(e)]) + has_error = True + continue + + if has_error: + result = ERROR + elif data: + result = FAIL_UF + return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) + + + # ---- Script Execution ---- @@ -6188,6 +6266,7 @@ class CheckManager: standby_sup_sync_check, isis_database_byte_check, configpush_shard_check, + sup_a_filesystem_check, ] ssh_checks = [ diff --git a/docs/docs/validations.md b/docs/docs/validations.md index 68ca1c0..74ab18e 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -193,6 +193,7 @@ Items | Defect | This Script [Stale pconsRA Object][d26] | CSCwp22212 | :warning:{title="Deprecated"} | :no_entry_sign: [ISIS DTEPs Byte Size][d27] | CSCwp15375 | :white_check_mark: | :no_entry_sign: [Policydist configpushShardCont Crash][d28] | CSCwp95515 | :white_check_mark: | +[SUP-A/A+ MNT PSS filesystem][d29] | CSCwq58901 | :white_check_mark: | :no_entry_sign: [d1]: #ep-announce-compatibility [d2]: #eventmgr-db-size-defect-susceptibility @@ -222,6 +223,7 @@ Items | Defect | This Script [d26]: #stale-pconsra-object [d27]: #isis-dteps-byte-size [d28]: #policydist-configpushshardcont-crash +[d29]: #sup-aa-mnt-pss-filesystem ## General Check Details @@ -2648,6 +2650,23 @@ Due to [CSCwp95515][59], upgrading to an affected version while having any `conf If any instances of `configpushShardCont` are flagged by this script, Cisco TAC must be contacted to identify and resolve the underlying issue before performing the upgrade. +### SUP-A/A+ MNT PSS filesystem + +In ACI the Supervisor models `N9K-SUP-A` and `N9K-SUP-A+` have a 64GB SSD. This causes the `/mnt/pss` filesystem to have 115M of size. + +Due to [CSCwq58901][62] the sysmgr.log files inside the filesystem can reach sizes over 30MB, leading to the filesystem getting full easily. This condition can lead to several issues during upgrade. + +The workaround is to free up space in the `/mnt/pss` filesystem by empty the content of the files. + +!!! note "Workaround" + + To zeroing the content of a file, you can use the following moquery command. + + ``` + spine# echo "" > /mnt/pss/bootlogs/1/sysmgr.log + + + [0]: https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script [1]: https://www.cisco.com/c/dam/en/us/td/docs/Website/datacenter/apicmatrix/index.html [2]: https://www.cisco.com/c/en/us/support/switches/nexus-9000-series-switches/products-release-notes-list.html @@ -2710,3 +2729,4 @@ If any instances of `configpushShardCont` are flagged by this script, Cisco TAC [59]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwp95515 [60]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#Inter [61]: https://www.cisco.com/c/en/us/solutions/collateral/data-center-virtualization/application-centric-infrastructure/white-paper-c11-743951.html#EnablePolicyCompression +[62]: https://bst.cloudapps.cisco.com/bugsearch/bug/CSCwq58901 \ No newline at end of file diff --git a/tests/checks/sup_a_filesystem_check/eqptSupC-pos.json b/tests/checks/sup_a_filesystem_check/eqptSupC-pos.json new file mode 100644 index 0000000..340294f --- /dev/null +++ b/tests/checks/sup_a_filesystem_check/eqptSupC-pos.json @@ -0,0 +1,206 @@ +[ + { + "eqptSupC": { + "attributes": { + "childAction": "", + "descr": "Supervisor Module", + "dn": "topology/pod-1/node-1201/sys/ch/supslot-1/sup", + "fwVer": "", + "hwVer": "2.0", + "id": "1", + "macB": "50-00-e0-54-25-d0", + "macE": "", + "macL": "18", + "mfgTm": "2025-02-16T19:00:00.000-05:00", + "modTs": "2026-01-20T08:53:36.989-05:00", + "model": "N9K-SUP-B+", + "monPolDn": "uni/fabric/monfab-default", + "numP": "0", + "operSt": "online", + "partNumber": "", + "pwrSt": "on", + "rdSt": "active", + "rev": "A1", + "ser": "FOC29080EM1", + "status": "", + "swCId": "350", + "swVer": "", + "type": "supervisor", + "upTs": "2026-01-20T08:53:36.989-05:00", + "vId": "V03", + "vdrId": "", + "vendor": "Cisco Systems, Inc." + } + } + }, + { + "eqptSupC": { + "attributes": { + "childAction": "", + "descr": "Supervisor Module", + "dn": "topology/pod-1/node-1201/sys/ch/supslot-2/sup", + "fwVer": "", + "hwVer": "2.0", + "id": "2", + "macB": "c8-82-34-56-36-36", + "macE": "", + "macL": "18", + "mfgTm": "2025-02-16T19:00:00.000-05:00", + "modTs": "2026-01-20T08:54:59.609-05:00", + "model": "N9K-SUP-B+", + "monPolDn": "uni/fabric/monfab-default", + "numP": "0", + "operSt": "online", + "partNumber": "", + "pwrSt": "on", + "rdSt": "standby", + "rev": "A1", + "ser": "FOC29080ENC", + "status": "", + "swCId": "350", + "swVer": "", + "type": "supervisor", + "upTs": "2026-01-20T08:54:59.609-05:00", + "vId": "V03", + "vdrId": "", + "vendor": "Cisco Systems, Inc." + } + } + }, + { + "eqptSupC": { + "attributes": { + "childAction": "", + "descr": "Supervisor Module", + "dn": "topology/pod-1/node-1202/sys/ch/supslot-2/sup", + "fwVer": "", + "hwVer": "2.0", + "id": "2", + "macB": "50-00-e0-8a-09-b0", + "macE": "", + "macL": "18", + "mfgTm": "2025-02-16T19:00:00.000-05:00", + "modTs": "2026-01-20T09:40:37.089-05:00", + "model": "N9K-SUP-B+", + "monPolDn": "uni/fabric/monfab-default", + "numP": "0", + "operSt": "online", + "partNumber": "", + "pwrSt": "on", + "rdSt": "active", + "rev": "A1", + "ser": "FOC29080ELR", + "status": "", + "swCId": "350", + "swVer": "", + "type": "supervisor", + "upTs": "2026-01-20T09:40:37.089-05:00", + "vId": "V03", + "vdrId": "", + "vendor": "Cisco Systems, Inc." + } + } + }, + { + "eqptSupC": { + "attributes": { + "childAction": "", + "descr": "Supervisor Module", + "dn": "topology/pod-1/node-1202/sys/ch/supslot-1/sup", + "fwVer": "", + "hwVer": "2.0", + "id": "1", + "macB": "50-00-e0-57-c3-b0", + "macE": "", + "macL": "18", + "mfgTm": "2025-02-16T19:00:00.000-05:00", + "modTs": "2026-01-20T09:42:18.024-05:00", + "model": "N9K-SUP-A+", + "monPolDn": "uni/fabric/monfab-default", + "numP": "0", + "operSt": "online", + "partNumber": "", + "pwrSt": "on", + "rdSt": "standby", + "rev": "A1", + "ser": "FOC29080ELZ", + "status": "", + "swCId": "350", + "swVer": "", + "type": "supervisor", + "upTs": "2026-01-20T09:42:18.024-05:00", + "vId": "V03", + "vdrId": "", + "vendor": "Cisco Systems, Inc." + } + } + }, + { + "eqptSupC": { + "attributes": { + "childAction": "", + "descr": "Supervisor Module", + "dn": "topology/pod-1/node-1203/sys/ch/supslot-1/sup", + "fwVer": "", + "hwVer": "1.0", + "id": "1", + "macB": "70-7d-b9-c3-0a-a0", + "macE": "", + "macL": "18", + "mfgTm": "2018-05-06T19:00:00.000-05:00", + "modTs": "2026-01-30T13:47:08.188-05:00", + "model": "N9K-SUP-A+", + "monPolDn": "uni/fabric/monfab-default", + "numP": "0", + "operSt": "online", + "partNumber": "", + "pwrSt": "on", + "rdSt": "active", + "rev": "C0", + "ser": "FOC22191BTC", + "status": "", + "swCId": "350", + "swVer": "", + "type": "supervisor", + "upTs": "2026-01-30T13:47:08.188-05:00", + "vId": "V01", + "vdrId": "", + "vendor": "Cisco Systems, Inc." + } + } + }, + { + "eqptSupC": { + "attributes": { + "childAction": "", + "descr": "Supervisor Module", + "dn": "topology/pod-1/node-1203/sys/ch/supslot-2/sup", + "fwVer": "", + "hwVer": "1.0", + "id": "2", + "macB": "2c-d0-2d-5c-ed-0c", + "macE": "", + "macL": "18", + "mfgTm": "2018-05-13T19:00:00.000-05:00", + "modTs": "2026-01-30T13:49:05.379-05:00", + "model": "N9K-SUP-A", + "monPolDn": "uni/fabric/monfab-default", + "numP": "0", + "operSt": "online", + "partNumber": "", + "pwrSt": "on", + "rdSt": "standby", + "rev": "C0", + "ser": "FOC2220309S", + "status": "", + "swCId": "350", + "swVer": "", + "type": "supervisor", + "upTs": "2026-01-30T13:49:05.379-05:00", + "vId": "V01", + "vdrId": "", + "vendor": "Cisco Systems, Inc." + } + } + } +] \ No newline at end of file diff --git a/tests/checks/sup_a_filesystem_check/fabricNode.json b/tests/checks/sup_a_filesystem_check/fabricNode.json new file mode 100644 index 0000000..c2fd1b1 --- /dev/null +++ b/tests/checks/sup_a_filesystem_check/fabricNode.json @@ -0,0 +1,98 @@ +[ + { + "fabricNode": { + "attributes": { + "adSt": "on", + "address": "10.0.216.66", + "annotation": "", + "apicType": "not-applicable", + "childAction": "", + "delayedHeartbeat": "no", + "dn": "topology/pod-1/node-1201", + "extMngdBy": "", + "fabricSt": "active", + "id": "1201", + "lastStateModTs": "2026-02-04T18:59:27.408-05:00", + "lcOwn": "local", + "mfgTm": "2017-04-09T19:00:00.000-05:00", + "modTs": "2026-02-04T18:59:35.453-05:00", + "model": "N9K-C9504", + "monPolDn": "uni/fabric/monfab-default", + "name": "spine1", + "nameAlias": "", + "nodeType": "unspecified", + "role": "spine", + "serial": "FOX2115PRJJ", + "status": "", + "uid": "0", + "userdom": "all", + "vendor": "Cisco Systems, Inc", + "version": "n9000-16.1(4h)" + } + } + }, + { + "fabricNode": { + "attributes": { + "adSt": "on", + "address": "10.0.216.65", + "annotation": "", + "apicType": "not-applicable", + "childAction": "", + "delayedHeartbeat": "no", + "dn": "topology/pod-1/node-1202", + "extMngdBy": "", + "fabricSt": "active", + "id": "1202", + "lastStateModTs": "2026-02-04T18:59:27.468-05:00", + "lcOwn": "local", + "mfgTm": "2017-04-09T19:00:00.000-05:00", + "modTs": "2026-02-04T18:59:35.514-05:00", + "model": "N9K-C9504", + "monPolDn": "uni/fabric/monfab-default", + "name": "spine2", + "nameAlias": "", + "nodeType": "unspecified", + "role": "spine", + "serial": "FOX2115PRK2", + "status": "", + "uid": "0", + "userdom": "all", + "vendor": "Cisco Systems, Inc", + "version": "n9000-16.1(4h)" + } + } + }, + { + "fabricNode": { + "attributes": { + "adSt": "on", + "address": "10.0.240.64", + "annotation": "", + "apicType": "not-applicable", + "childAction": "", + "delayedHeartbeat": "no", + "dn": "topology/pod-1/node-1203", + "extMngdBy": "", + "fabricSt": "active", + "id": "1203", + "lastStateModTs": "2026-02-04T18:59:27.392-05:00", + "lcOwn": "local", + "mfgTm": "2014-05-11T19:00:00.000-05:00", + "modTs": "2026-02-04T18:59:35.436-05:00", + "model": "N9K-C9508", + "monPolDn": "uni/fabric/monfab-default", + "name": "spine3", + "nameAlias": "", + "nodeType": "unspecified", + "role": "spine", + "serial": "FGE18200AVQ", + "status": "", + "uid": "0", + "userdom": "all", + "vendor": "Cisco Systems, Inc", + "version": "n9000-16.1(4h)" + } + } + } +] \ No newline at end of file diff --git a/tests/checks/sup_a_filesystem_check/test_sup_a_filesystem_check.py b/tests/checks/sup_a_filesystem_check/test_sup_a_filesystem_check.py new file mode 100644 index 0000000..597ad3d --- /dev/null +++ b/tests/checks/sup_a_filesystem_check/test_sup_a_filesystem_check.py @@ -0,0 +1,189 @@ +import os +import pytest +import logging +import importlib +from helpers.utils import read_data + +script = importlib.import_module("aci-preupgrade-validation-script") + +log = logging.getLogger(__name__) +dir = os.path.dirname(os.path.abspath(__file__)) + +test_function = "sup_a_filesystem_check" + +n9k_sup_api = 'eqptSupC.json' +n9k_sup_api += '?query-target-filter=and(wcard(eqptSupC.model,"N9K-SUP-A"))' + +fabricNodes = read_data(dir, "fabricNode.json") +switch_ips = [ + mo["fabricNode"]["attributes"]["address"] + for mo in fabricNodes + if mo["fabricNode"]["attributes"]["role"] == "spine" +] + +mntpss_cmd = "du -ahm /mnt/pss/bootlogs/ | sort -rh | head -15" +mntpss_output_neg = """\ +5 /mnt/pss/bootlogs/ +2 /mnt/pss/bootlogs/1/sysmgr.log +2 /mnt/pss/bootlogs/1 +1 /mnt/pss/bootlogs/9/sysmgr.log +1 /mnt/pss/bootlogs/9/nvram_prev_oops_blk_2.log +1 /mnt/pss/bootlogs/9/nvram_prev_oops_blk_1.log +1 /mnt/pss/bootlogs/9/nvram_prev_dmesg_blk_2_inactive.log +1 /mnt/pss/bootlogs/9/nvram_prev_dmesg_blk_1_active.log +1 /mnt/pss/bootlogs/9/isan.log +""" + + +mntpss_output_pos = """\ +55 /mnt/pss/bootlogs/ +32 /mnt/pss/bootlogs/1/sysmgr.log +31 /mnt/pss/bootlogs/9/nvram_prev_dmesg_blk_1_active.log +1 /mnt/pss/bootlogs/9/isan.log +""" + +mntpss_output_no_such_file = """\ +ls: cannot access /mnt/pss/bootlogs: No such file or directory +spine# +""" + +@pytest.mark.parametrize( + "icurl_outputs, fabric_nodes, tversion, conn_failure, conn_cmds, expected_result, expected_data", + [ + # MANUAL, no tversion + ( + { + n9k_sup_api: read_data(dir, "eqptSupC-pos.json") + }, + fabricNodes, + False, + False, + [], + script.MANUAL, + [], + ), + # NA, no fabricNode with affected models + ( + { + n9k_sup_api: [] + }, + fabricNodes, + "6.0(8a)", + False, + [], + script.NA, + [], + ), + # NA, Versions not affected + ( + { + n9k_sup_api: read_data(dir, "eqptSupC-pos.json") + }, + fabricNodes, + "6.1(4d)", + False, + [], + script.NA, + [], + ), + # Connection failure + ( + { + n9k_sup_api: read_data(dir, "eqptSupC-pos.json") + }, + fabricNodes, + "4.2(4c)", + True, + [], + script.ERROR, + [ + ["1201", "spine1","-", "Simulated exception at connect()"], + ["1202", "spine2", "-","Simulated exception at connect()"], + ["1203", "spine3", "-","Simulated exception at connect()"], + ], + ), + # Simulated exception at `du /mnt/pss ` command + ( + { + n9k_sup_api: read_data(dir, "eqptSupC-pos.json") + }, + fabricNodes, + "5.2(4l)", + False, + { + switch_ip: [ + { + "cmd": mntpss_cmd, + "output": "\n".join([mntpss_cmd, mntpss_output_no_such_file]), + "exception": Exception("Simulated exception at `du` command"), + } + ] + for switch_ip in switch_ips + }, + script.ERROR, + [ + ["1201", "spine1", "-", "Simulated exception at `du` command"], + ["1202", "spine2", "-", "Simulated exception at `du` command"], + ["1203", "spine3", "-", "Simulated exception at `du` command"], + ], + ), + # PASS Affected Models, cpu-info is correct (0x0) + ( + { + n9k_sup_api: read_data(dir, "eqptSupC-pos.json") + }, + fabricNodes, + "5.2(4c)", + False, + { + switch_ip: [ + { + "cmd": mntpss_cmd, + "output": "\n".join([mntpss_cmd, mntpss_output_neg]), + "exception": None, + } + ] + for switch_ip in switch_ips + }, + script.PASS, + [], + ), + # FAIL_UF Affected Models, cpu-info is incorrect (0x3) + ( + { + n9k_sup_api: read_data(dir, "eqptSupC-pos.json") + }, + fabricNodes, + "5.2(4p)", + False, + { + switch_ip: [ + { + "cmd": mntpss_cmd, + "output": "\n".join([mntpss_cmd, mntpss_output_pos]), + "exception": None, + } + ] + for switch_ip in switch_ips + }, + script.FAIL_UF, + [ + ["1201", "spine1", "32", "/mnt/pss/bootlogs/1/sysmgr.log"], + ["1201", "spine1", "31", "/mnt/pss/bootlogs/9/nvram_prev_dmesg_blk_1_active.log"], + ["1202", "spine2", "32", "/mnt/pss/bootlogs/1/sysmgr.log"], + ["1202", "spine2", "31", "/mnt/pss/bootlogs/9/nvram_prev_dmesg_blk_1_active.log"], + ["1203", "spine3", "32", "/mnt/pss/bootlogs/1/sysmgr.log"], + ["1203", "spine3", "31", "/mnt/pss/bootlogs/9/nvram_prev_dmesg_blk_1_active.log"], + ], + ), + ], +) +def test_logic(run_check, mock_icurl, fabric_nodes, tversion, mock_conn, expected_result, expected_data): + result = run_check( + username="fake_username", + password="fake_password", + fabric_nodes=fabric_nodes, + tversion=script.AciVersion(tversion) if tversion else None, + ) + assert result.result == expected_result + assert result.data == expected_data \ No newline at end of file